From owner-svn-src-head@FreeBSD.ORG Tue Aug 27 19:46:58 2013 Return-Path: Delivered-To: svn-src-head@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:1900:2254:206a::19:1]) (using TLSv1 with cipher ADH-AES256-SHA (256/256 bits)) (No client certificate requested) by hub.freebsd.org (Postfix) with ESMTP id 22D6D444; Tue, 27 Aug 2013 19:46:58 +0000 (UTC) (envelope-from kargl@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:1900:2254:2068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mx1.freebsd.org (Postfix) with ESMTPS id 0E7672DF6; Tue, 27 Aug 2013 19:46:58 +0000 (UTC) Received: from svn.freebsd.org ([127.0.1.70]) by svn.freebsd.org (8.14.7/8.14.7) with ESMTP id r7RJkvOq013318; Tue, 27 Aug 2013 19:46:57 GMT (envelope-from kargl@svn.freebsd.org) Received: (from kargl@localhost) by svn.freebsd.org (8.14.7/8.14.5/Submit) id r7RJkuON013313; Tue, 27 Aug 2013 19:46:56 GMT (envelope-from kargl@svn.freebsd.org) Message-Id: <201308271946.r7RJkuON013313@svn.freebsd.org> From: Steve Kargl Date: Tue, 27 Aug 2013 19:46:56 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r254969 - head/lib/msun/src X-SVN-Group: head MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-head@freebsd.org X-Mailman-Version: 2.1.14 Precedence: list List-Id: SVN commit messages for the src tree for head/-current List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 27 Aug 2013 19:46:58 -0000 Author: kargl Date: Tue Aug 27 19:46:56 2013 New Revision: 254969 URL: http://svnweb.freebsd.org/changeset/base/254969 Log: * s_erf.c: . Use integer literal constants instead of double literal constants. * s_erff.c: . Use integer literal constants instead of casting double literal constants to float. . Update the threshold values from those carried over from erf() to values appropriate for float. . New sets of polynomial coefficients for the rational approximations. These coefficients have little, but positive, effect on the maximum error in ULP in the four intervals, but do improve the overall speed of execution. . Remove redundant GET_FLOAT_WORD(ix,x) as hx already contained the contents that is packed into ix. . Update the mask that is used to zero-out lower-order bits in x in the intervals [1.25, 2.857143] and [2.857143, 12]. In tests on amd64, this change improves the maximum error in ULP from 6.27739 and 63.8095 to 3.16774 and 2.92095 on these intervals for erffc(). Reviewed by: bde Modified: head/lib/msun/src/s_erf.c head/lib/msun/src/s_erff.c Modified: head/lib/msun/src/s_erf.c ============================================================================== --- head/lib/msun/src/s_erf.c Tue Aug 27 19:10:36 2013 (r254968) +++ head/lib/msun/src/s_erf.c Tue Aug 27 19:46:56 2013 (r254969) @@ -201,7 +201,7 @@ erf(double x) if(ix < 0x3feb0000) { /* |x|<0.84375 */ if(ix < 0x3e300000) { /* |x|<2**-28 */ if (ix < 0x00800000) - return 0.125*(8.0*x+efx8*x); /*avoid underflow */ + return (8*x+efx8*x)/8; /* avoid spurious underflow */ return x + efx*x; } z = x*x; Modified: head/lib/msun/src/s_erff.c ============================================================================== --- head/lib/msun/src/s_erff.c Tue Aug 27 19:10:36 2013 (r254968) +++ head/lib/msun/src/s_erff.c Tue Aug 27 19:46:56 2013 (r254969) @@ -24,75 +24,59 @@ tiny = 1e-30, half= 5.0000000000e-01, /* 0x3F000000 */ one = 1.0000000000e+00, /* 0x3F800000 */ two = 2.0000000000e+00, /* 0x40000000 */ - /* c = (subfloat)0.84506291151 */ -erx = 8.4506291151e-01, /* 0x3f58560b */ /* - * Coefficients for approximation to erf on [0,0.84375] + * Coefficients for approximation to erf on [0,0.84375] */ efx = 1.2837916613e-01, /* 0x3e0375d4 */ efx8= 1.0270333290e+00, /* 0x3f8375d4 */ -pp0 = 1.2837916613e-01, /* 0x3e0375d4 */ -pp1 = -3.2504209876e-01, /* 0xbea66beb */ -pp2 = -2.8481749818e-02, /* 0xbce9528f */ -pp3 = -5.7702702470e-03, /* 0xbbbd1489 */ -pp4 = -2.3763017452e-05, /* 0xb7c756b1 */ -qq1 = 3.9791721106e-01, /* 0x3ecbbbce */ -qq2 = 6.5022252500e-02, /* 0x3d852a63 */ -qq3 = 5.0813062117e-03, /* 0x3ba68116 */ -qq4 = 1.3249473704e-04, /* 0x390aee49 */ -qq5 = -3.9602282413e-06, /* 0xb684e21a */ /* - * Coefficients for approximation to erf in [0.84375,1.25] + * Domain [0, 0.84375], range ~[-5.4446e-10,5.5197e-10]: + * |(erf(x) - x)/x - p(x)/q(x)| < 2**-31. */ -pa0 = -2.3621185683e-03, /* 0xbb1acdc6 */ -pa1 = 4.1485610604e-01, /* 0x3ed46805 */ -pa2 = -3.7220788002e-01, /* 0xbebe9208 */ -pa3 = 3.1834661961e-01, /* 0x3ea2fe54 */ -pa4 = -1.1089469492e-01, /* 0xbde31cc2 */ -pa5 = 3.5478305072e-02, /* 0x3d1151b3 */ -pa6 = -2.1663755178e-03, /* 0xbb0df9c0 */ -qa1 = 1.0642088205e-01, /* 0x3dd9f331 */ -qa2 = 5.4039794207e-01, /* 0x3f0a5785 */ -qa3 = 7.1828655899e-02, /* 0x3d931ae7 */ -qa4 = 1.2617121637e-01, /* 0x3e013307 */ -qa5 = 1.3637083583e-02, /* 0x3c5f6e13 */ -qa6 = 1.1984500103e-02, /* 0x3c445aa3 */ +pp0 = 1.28379166e-01F, /* 0x1.06eba8p-3 */ +pp1 = -3.36030394e-01F, /* -0x1.58185ap-2 */ +pp2 = -1.86260219e-03F, /* -0x1.e8451ep-10 */ +qq1 = 3.12324286e-01F, /* 0x1.3fd1f0p-2 */ +qq2 = 2.16070302e-02F, /* 0x1.620274p-6 */ +qq3 = -1.98859419e-03F, /* -0x1.04a626p-9 */ /* - * Coefficients for approximation to erfc in [1.25,1/0.35] + * Domain [0.84375, 1.25], range ~[-1.953e-11,1.940e-11]: + * |(erf(x) - erx) - p(x)/q(x)| < 2**-36. */ -ra0 = -9.8649440333e-03, /* 0xbc21a093 */ -ra1 = -6.9385856390e-01, /* 0xbf31a0b7 */ -ra2 = -1.0558626175e+01, /* 0xc128f022 */ -ra3 = -6.2375331879e+01, /* 0xc2798057 */ -ra4 = -1.6239666748e+02, /* 0xc322658c */ -ra5 = -1.8460508728e+02, /* 0xc3389ae7 */ -ra6 = -8.1287437439e+01, /* 0xc2a2932b */ -ra7 = -9.8143291473e+00, /* 0xc11d077e */ -sa1 = 1.9651271820e+01, /* 0x419d35ce */ -sa2 = 1.3765776062e+02, /* 0x4309a863 */ -sa3 = 4.3456588745e+02, /* 0x43d9486f */ -sa4 = 6.4538726807e+02, /* 0x442158c9 */ -sa5 = 4.2900814819e+02, /* 0x43d6810b */ -sa6 = 1.0863500214e+02, /* 0x42d9451f */ -sa7 = 6.5702495575e+00, /* 0x40d23f7c */ -sa8 = -6.0424413532e-02, /* 0xbd777f97 */ +erx = 8.42697144e-01F, /* 0x1.af7600p-1. erf(1) rounded to 16 bits. */ +pa0 = 3.64939137e-06F, /* 0x1.e9d022p-19 */ +pa1 = 4.15109694e-01F, /* 0x1.a91284p-2 */ +pa2 = -1.65179938e-01F, /* -0x1.5249dcp-3 */ +pa3 = 1.10914491e-01F, /* 0x1.c64e46p-4 */ +qa1 = 6.02074385e-01F, /* 0x1.344318p-1 */ +qa2 = 5.35934687e-01F, /* 0x1.126608p-1 */ +qa3 = 1.68576106e-01F, /* 0x1.593e6ep-3 */ +qa4 = 5.62181212e-02F, /* 0x1.cc89f2p-5 */ /* - * Coefficients for approximation to erfc in [1/.35,28] + * Domain [1.25,1/0.35], range ~[-7.043e-10,7.457e-10]: + * |log(x*erfc(x)) + x**2 + 0.5625 - r(x)/s(x)| < 2**-30 */ -rb0 = -9.8649431020e-03, /* 0xbc21a092 */ -rb1 = -7.9928326607e-01, /* 0xbf4c9dd4 */ -rb2 = -1.7757955551e+01, /* 0xc18e104b */ -rb3 = -1.6063638306e+02, /* 0xc320a2ea */ -rb4 = -6.3756646729e+02, /* 0xc41f6441 */ -rb5 = -1.0250950928e+03, /* 0xc480230b */ -rb6 = -4.8351919556e+02, /* 0xc3f1c275 */ -sb1 = 3.0338060379e+01, /* 0x41f2b459 */ -sb2 = 3.2579251099e+02, /* 0x43a2e571 */ -sb3 = 1.5367296143e+03, /* 0x44c01759 */ -sb4 = 3.1998581543e+03, /* 0x4547fdbb */ -sb5 = 2.5530502930e+03, /* 0x451f90ce */ -sb6 = 4.7452853394e+02, /* 0x43ed43a7 */ -sb7 = -2.2440952301e+01; /* 0xc1b38712 */ +ra0 = -9.87132732e-03F, /* -0x1.4376b2p-7 */ +ra1 = -5.53605914e-01F, /* -0x1.1b723cp-1 */ +ra2 = -2.17589188e+00F, /* -0x1.1683a0p+1 */ +ra3 = -1.43268085e+00F, /* -0x1.6ec42cp+0 */ +sa1 = 5.45995426e+00F, /* 0x1.5d6fe4p+2 */ +sa2 = 6.69798088e+00F, /* 0x1.acabb8p+2 */ +sa3 = 1.43113089e+00F, /* 0x1.6e5e98p+0 */ +sa4 = -5.77397496e-02F, /* -0x1.d90108p-5 */ +/* + * Domain [1/0.35, 11], range ~[-2.264e-13,2.336e-13]: + * |log(x*erfc(x)) + x**2 + 0.5625 - r(x)/s(x)| < 2**-42 + */ +rb0 = -9.86494310e-03F, /* -0x1.434124p-7 */ +rb1 = -6.25171244e-01F, /* -0x1.401672p-1 */ +rb2 = -6.16498327e+00F, /* -0x1.8a8f16p+2 */ +rb3 = -1.66696873e+01F, /* -0x1.0ab70ap+4 */ +rb4 = -9.53764343e+00F, /* -0x1.313460p+3 */ +sb1 = 1.26884899e+01F, /* 0x1.96081cp+3 */ +sb2 = 4.51839523e+01F, /* 0x1.6978bcp+5 */ +sb3 = 4.72810211e+01F, /* 0x1.7a3f88p+5 */ +sb4 = 8.93033314e+00F; /* 0x1.1dc54ap+3 */ float erff(float x) @@ -107,43 +91,37 @@ erff(float x) } if(ix < 0x3f580000) { /* |x|<0.84375 */ - if(ix < 0x31800000) { /* |x|<2**-28 */ - if (ix < 0x04000000) - /*avoid underflow */ - return (float)0.125*((float)8.0*x+efx8*x); + if(ix < 0x38800000) { /* |x|<2**-14 */ + if (ix < 0x04000000) /* |x|<0x1p-119 */ + return (8*x+efx8*x)/8; /* avoid spurious underflow */ return x + efx*x; } z = x*x; - r = pp0+z*(pp1+z*(pp2+z*(pp3+z*pp4))); - s = one+z*(qq1+z*(qq2+z*(qq3+z*(qq4+z*qq5)))); + r = pp0+z*(pp1+z*pp2); + s = one+z*(qq1+z*(qq2+z*qq3)); y = r/s; return x + x*y; } if(ix < 0x3fa00000) { /* 0.84375 <= |x| < 1.25 */ s = fabsf(x)-one; - P = pa0+s*(pa1+s*(pa2+s*(pa3+s*(pa4+s*(pa5+s*pa6))))); - Q = one+s*(qa1+s*(qa2+s*(qa3+s*(qa4+s*(qa5+s*qa6))))); + P = pa0+s*(pa1+s*(pa2+s*pa3)); + Q = one+s*(qa1+s*(qa2+s*(qa3+s*qa4))); if(hx>=0) return erx + P/Q; else return -erx - P/Q; } - if (ix >= 0x40c00000) { /* inf>|x|>=6 */ + if (ix >= 0x40800000) { /* inf>|x|>=4 */ if(hx>=0) return one-tiny; else return tiny-one; } x = fabsf(x); s = one/(x*x); if(ix< 0x4036DB6E) { /* |x| < 1/0.35 */ - R=ra0+s*(ra1+s*(ra2+s*(ra3+s*(ra4+s*( - ra5+s*(ra6+s*ra7)))))); - S=one+s*(sa1+s*(sa2+s*(sa3+s*(sa4+s*( - sa5+s*(sa6+s*(sa7+s*sa8))))))); + R=ra0+s*(ra1+s*(ra2+s*ra3)); + S=one+s*(sa1+s*(sa2+s*(sa3+s*sa4))); } else { /* |x| >= 1/0.35 */ - R=rb0+s*(rb1+s*(rb2+s*(rb3+s*(rb4+s*( - rb5+s*rb6))))); - S=one+s*(sb1+s*(sb2+s*(sb3+s*(sb4+s*( - sb5+s*(sb6+s*sb7)))))); - } - GET_FLOAT_WORD(ix,x); - SET_FLOAT_WORD(z,ix&0xfffff000); - r = __ieee754_expf(-z*z-(float)0.5625)*__ieee754_expf((z-x)*(z+x)+R/S); + R=rb0+s*(rb1+s*(rb2+s*(rb3+s*rb4))); + S=one+s*(sb1+s*(sb2+s*(sb3+s*sb4))); + } + SET_FLOAT_WORD(z,hx&0xffffe000); + r = expf(-z*z-0.5625F)*expf((z-x)*(z+x)+R/S); if(hx>=0) return one-r/x; else return r/x-one; } @@ -160,11 +138,11 @@ erfcf(float x) } if(ix < 0x3f580000) { /* |x|<0.84375 */ - if(ix < 0x23800000) /* |x|<2**-56 */ + if(ix < 0x33800000) /* |x|<2**-24 */ return one-x; z = x*x; - r = pp0+z*(pp1+z*(pp2+z*(pp3+z*pp4))); - s = one+z*(qq1+z*(qq2+z*(qq3+z*(qq4+z*qq5)))); + r = pp0+z*(pp1+z*pp2); + s = one+z*(qq1+z*(qq2+z*qq3)); y = r/s; if(hx < 0x3e800000) { /* x<1/4 */ return one-(x+x*y); @@ -176,33 +154,27 @@ erfcf(float x) } if(ix < 0x3fa00000) { /* 0.84375 <= |x| < 1.25 */ s = fabsf(x)-one; - P = pa0+s*(pa1+s*(pa2+s*(pa3+s*(pa4+s*(pa5+s*pa6))))); - Q = one+s*(qa1+s*(qa2+s*(qa3+s*(qa4+s*(qa5+s*qa6))))); + P = pa0+s*(pa1+s*(pa2+s*pa3)); + Q = one+s*(qa1+s*(qa2+s*(qa3+s*qa4))); if(hx>=0) { z = one-erx; return z - P/Q; } else { z = erx+P/Q; return one+z; } } - if (ix < 0x41e00000) { /* |x|<28 */ + if (ix < 0x41300000) { /* |x|<11 */ x = fabsf(x); s = one/(x*x); if(ix< 0x4036DB6D) { /* |x| < 1/.35 ~ 2.857143*/ - R=ra0+s*(ra1+s*(ra2+s*(ra3+s*(ra4+s*( - ra5+s*(ra6+s*ra7)))))); - S=one+s*(sa1+s*(sa2+s*(sa3+s*(sa4+s*( - sa5+s*(sa6+s*(sa7+s*sa8))))))); + R=ra0+s*(ra1+s*(ra2+s*ra3)); + S=one+s*(sa1+s*(sa2+s*(sa3+s*sa4))); } else { /* |x| >= 1/.35 ~ 2.857143 */ - if(hx<0&&ix>=0x40c00000) return two-tiny;/* x < -6 */ - R=rb0+s*(rb1+s*(rb2+s*(rb3+s*(rb4+s*( - rb5+s*rb6))))); - S=one+s*(sb1+s*(sb2+s*(sb3+s*(sb4+s*( - sb5+s*(sb6+s*sb7)))))); + if(hx<0&&ix>=0x40a00000) return two-tiny;/* x < -5 */ + R=rb0+s*(rb1+s*(rb2+s*(rb3+s*rb4))); + S=one+s*(sb1+s*(sb2+s*(sb3+s*sb4))); } - GET_FLOAT_WORD(ix,x); - SET_FLOAT_WORD(z,ix&0xfffff000); - r = __ieee754_expf(-z*z-(float)0.5625)* - __ieee754_expf((z-x)*(z+x)+R/S); + SET_FLOAT_WORD(z,hx&0xffffe000); + r = expf(-z*z-0.5625F)*expf((z-x)*(z+x)+R/S); if(hx>0) return r/x; else return two-r/x; } else { if(hx>0) return tiny*tiny; else return two-tiny;