Skip site navigation (1)Skip section navigation (2)
Date:      Sat, 13 May 2017 15:08:26 +0200
From:      Dimitry Andric <dimitry@andric.com>
To:        sgk@troutmask.apl.washington.edu
Cc:        Bruce Evans <brde@optusnet.com.au>, freebsd-hackers@freebsd.org, numerics@freebsd.org
Subject:   Re: catrig[fl].c and inexact
Message-ID:  <F5F8736B-D7E1-48AD-BC6C-8C74AF0A3272@andric.com>
In-Reply-To: <20170513060803.GA84399@troutmask.apl.washington.edu>
References:  <20170512215654.GA82545@troutmask.apl.washington.edu> <20170513103208.M845@besplex.bde.org> <20170513060803.GA84399@troutmask.apl.washington.edu>

next in thread | previous in thread | raw e-mail | index | archive | help

--Apple-Mail=_4FBC88C3-4C7E-4D97-8BD0-773DBE95BCD3
Content-Transfer-Encoding: quoted-printable
Content-Type: text/plain;
	charset=us-ascii

On 13 May 2017, at 08:08, Steve Kargl <sgk@troutmask.apl.washington.edu> =
wrote:
>=20
> On Sat, May 13, 2017 at 11:35:49AM +1000, Bruce Evans wrote:
>> On Fri, 12 May 2017, Steve Kargl wrote:
...
>> required for the standard magic.  I planned to fix all this magic =
using
>> macros like raise_inexact().
>=20
> If you plan to fix the magic with raise_inexact, then please
> test with a suite of compilers.  AFAICT, clang is optimizing
> out the code.  I haven't written a testcase to demonstrate this
> as I have other irons in the fire.

Using the full catrig.c and -O3, I tried gcc 4.2.1, 4.7.4, 4.8.5, 4.9.4,
5.4.0, 6.3.0 and 7.0.1, in addition to clang 3.4.1, 3.8.0, 3.9.1, 4.0.0
and 5.0.0.  All versions of gcc produced something similar to the
following for i386:

# /usr/src/lib/msun/src/catrig.c:314:   if (x =3D=3D 0 && y =3D=3D 0)
        .loc 1 314 0
        fldz
        fucom   %st(3)  #
        fnstsw  %ax     # tmp262
        sahf
        setne   %al     #, tmp270
        setnp   %dl     #, tmp259
        subl    $1, %eax        #, tmp270
        testb   %al, %dl        # tmp270, tmp259
        je      .L176   #,
        fucomp  %st(1)  #
        fnstsw  %ax     # tmp281
        sahf
        setne   %al     #, tmp289
        setnp   %dl     #, tmp278
        subl    $1, %eax        #, tmp289
        testb   %al, %dl        # tmp289, tmp278
        je      .L37    #,
        fstp    %st(3)  #
        fstp    %st(0)  #
        jmp     .L153   #
[...]
.L176:
        fstp    %st(0)  #
.L37:
.LBB25:
# /usr/src/lib/msun/src/catrig.c:318:   raise_inexact();
        flds    tiny    # tiny
        fadds   .LC2    #
        fstps   120(%esp)       # junk

and for amd64:

# /usr/src/lib/msun/src/catrig.c:314:   if (x =3D=3D 0 && y =3D=3D 0)
        .loc 1 314 0
        pxor    %xmm7, %xmm7    # tmp386
        ucomisd %xmm7, %xmm3    # tmp386, z
        setnp   %dl     #, tmp258
        cmovne  %eax, %edx      # tmp258,, tmp207, tmp254
        testb   %dl, %dl        # tmp254
        je      .L34    #,
        ucomisd %xmm7, %xmm1    # tmp386, z
        setnp   %dl     #, tmp266
        cmove   %edx, %eax      # tmp266,, tmp262
        testb   %al, %al        # tmp262
        je      .L34    #,
[...]
.L34:
.LBB33:
# /usr/src/lib/msun/src/catrig.c:318:   raise_inexact();
        movss   tiny(%rip), %xmm0       # tiny, tiny.0_28
        addss   .LC13(%rip), %xmm0      #, _29
        movss   %xmm0, 188(%rsp)        # _29, junk

All versions of clang produced something similar to the following for
i386:

        .loc    1 314 8 is_stmt 1       # =
/usr/src/lib/msun/src/catrig.c:314:8
        fldz
        .loc    1 314 13 is_stmt 0      # =
/usr/src/lib/msun/src/catrig.c:314:13
        fxch    %st(1)
        fucom   %st(1)
        fnstsw  %ax
        sahf
        jne     .LBB0_19
        jp      .LBB0_19
        .loc    1 0 13                  # =
/usr/src/lib/msun/src/catrig.c:0:13
        fxch    %st(3)
        fucom   %st(1)
        fstp    %st(1)
        fnstsw  %ax
        sahf
        fldz
        fxch    %st(1)
        fxch    %st(3)
        jne     .LBB0_19
        jp      .LBB0_19
[...]
.LBB0_19:                               # %do.body
        .loc    1 0 8 is_stmt 0         # =
/usr/src/lib/msun/src/catrig.c:0:8
        fstp    %st(1)
        .loc    1 318 2 is_stmt 1       # =
/usr/src/lib/msun/src/catrig.c:318:2
        fld1
        fadds   tiny
        fstps   168(%esp)

and for amd64:

        .loc    1 314 8 is_stmt 1       # =
/usr/src/lib/msun/src/catrig.c:314:8
        pxor    %xmm2, %xmm2
        .loc    1 314 13 is_stmt 0      # =
/usr/src/lib/msun/src/catrig.c:314:13
        ucomisd %xmm2, %xmm4
        jne     .LBB0_15
        jp      .LBB0_15
        .loc    1 0 13                  # =
/usr/src/lib/msun/src/catrig.c:0:13
        ucomisd %xmm2, %xmm3
        jne     .LBB0_15
        jnp     .LBB0_21
.LBB0_15:                               # %do.body
        .loc    1 318 2 is_stmt 1       # =
/usr/src/lib/msun/src/catrig.c:318:2
        movss   tiny(%rip), %xmm2       # xmm2 =3D mem[0],zero,zero,zero
        addss   .LCPI0_2(%rip), %xmm2
.Ltmp11:
        movss   %xmm2, -16(%rbp)

E.g., these all look good, at least with regards to not optimizing out
the desired addition.

The only compiler I could find that does optimize everything away (at
least in the simplified test case), is the Intel compiler:

https://godbolt.org/g/g1UT2m

-Dimitry


--Apple-Mail=_4FBC88C3-4C7E-4D97-8BD0-773DBE95BCD3
Content-Transfer-Encoding: 7bit
Content-Disposition: attachment;
	filename=signature.asc
Content-Type: application/pgp-signature;
	name=signature.asc
Content-Description: Message signed with OpenPGP

-----BEGIN PGP SIGNATURE-----
Version: GnuPG/MacGPG2 v2.0.30

iEYEARECAAYFAlkXBVEACgkQsF6jCi4glqP6KQCg2xk6WB11svnu92R6Rr2NtmO5
9TIAoK00DaX+gGpjflMpSreyQ5iVCdy0
=FHkh
-----END PGP SIGNATURE-----

--Apple-Mail=_4FBC88C3-4C7E-4D97-8BD0-773DBE95BCD3--



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?F5F8736B-D7E1-48AD-BC6C-8C74AF0A3272>