From owner-svn-src-all@FreeBSD.ORG Thu Jan 12 17:55:22 2012 Return-Path: Delivered-To: svn-src-all@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id AFEBB1065672; Thu, 12 Jan 2012 17:55:22 +0000 (UTC) (envelope-from ed@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id 9DB218FC16; Thu, 12 Jan 2012 17:55:22 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.4/8.14.4) with ESMTP id q0CHtMJ2020350; Thu, 12 Jan 2012 17:55:22 GMT (envelope-from ed@svn.freebsd.org) Received: (from ed@localhost) by svn.freebsd.org (8.14.4/8.14.4/Submit) id q0CHtMA2020344; Thu, 12 Jan 2012 17:55:22 GMT (envelope-from ed@svn.freebsd.org) Message-Id: <201201121755.q0CHtMA2020344@svn.freebsd.org> From: Ed Schouten Date: Thu, 12 Jan 2012 17:55:22 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org X-SVN-Group: head MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r230025 - head/contrib/compiler-rt/lib/sparc64 X-BeenThere: svn-src-all@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: "SVN commit messages for the entire src tree \(except for " user" and " projects" \)" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 12 Jan 2012 17:55:22 -0000 Author: ed Date: Thu Jan 12 17:55:22 2012 New Revision: 230025 URL: http://svn.freebsd.org/changeset/base/230025 Log: Add SPARC64 version of div/mod written in assembly. This version is similar to the code shipped with libgcc. It is based on the code from the SPARC64 architecture manual, provided without any restrictions. Tested by: flo@ Added: head/contrib/compiler-rt/lib/sparc64/ head/contrib/compiler-rt/lib/sparc64/divmod.m4 head/contrib/compiler-rt/lib/sparc64/divsi3.S head/contrib/compiler-rt/lib/sparc64/generate.sh head/contrib/compiler-rt/lib/sparc64/modsi3.S head/contrib/compiler-rt/lib/sparc64/udivsi3.S head/contrib/compiler-rt/lib/sparc64/umodsi3.S Added: head/contrib/compiler-rt/lib/sparc64/divmod.m4 ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ head/contrib/compiler-rt/lib/sparc64/divmod.m4 Thu Jan 12 17:55:22 2012 (r230025) @@ -0,0 +1,250 @@ +/* + * This m4 code has been taken from The SPARC Architecture Manual Version 8. + */ + +/* + * Division/Remainder + * + * Input is: + * dividend -- the thing being divided + * divisor -- how many ways to divide it + * Important parameters: + * N -- how many bits per iteration we try to get + * as our current guess: define(N, 4) define(TWOSUPN, 16) + * WORDSIZE -- how many bits altogether we're talking about: + * obviously: define(WORDSIZE, 32) + * A derived constant: + * TOPBITS -- how many bits are in the top "decade" of a number: + * define(TOPBITS, eval( WORDSIZE - N*((WORDSIZE-1)/N) ) ) + * Important variables are: + * Q -- the partial quotient under development -- initially 0 + * R -- the remainder so far -- initially == the dividend + * ITER -- number of iterations of the main division loop which will + * be required. Equal to CEIL( lg2(quotient)/N ) + * Note that this is log_base_(2ˆN) of the quotient. + * V -- the current comparand -- initially divisor*2ˆ(ITER*N-1) + * Cost: + * current estimate for non-large dividend is + * CEIL( lg2(quotient) / N ) x ( 10 + 7N/2 ) + C + * a large dividend is one greater than 2ˆ(31-TOPBITS) and takes a + * different path, as the upper bits of the quotient must be developed + * one bit at a time. + * This uses the m4 and cpp macro preprocessors. + */ + +define(dividend, `%o0') +define(divisor,`%o1') +define(Q, `%o2') +define(R, `%o3') +define(ITER, `%o4') +define(V, `%o5') +define(SIGN, `%g3') +define(T, `%g1') +define(SC,`%g2') +/* + * This is the recursive definition of how we develop quotient digits. + * It takes three important parameters: + * $1 -- the current depth, 1<=$1<=N + * $2 -- the current accumulation of quotient bits + * N -- max depth + * We add a new bit to $2 and either recurse or insert the bits in the quotient. + * Dynamic input: + * R -- current remainder + * Q -- current quotient + * V -- current comparand + * cc -- set on current value of R + * Dynamic output: + * R', Q', V', cc' + */ + +#include "../assembly.h" + +.text + .align 4 + +define(DEVELOP_QUOTIENT_BITS, +` !depth $1, accumulated bits $2 + bl L.$1.eval(TWOSUPN+$2) + srl V,1,V + ! remainder is nonnegative + subcc R,V,R + ifelse( $1, N, + ` b 9f + add Q, ($2*2+1), Q + ',` DEVELOP_QUOTIENT_BITS( incr($1), `eval(2*$2+1)') + ') +L.$1.eval(TWOSUPN+$2): + ! remainder is negative + addcc R,V,R + ifelse( $1, N, + ` b 9f + add Q, ($2*2-1), Q + ',` DEVELOP_QUOTIENT_BITS( incr($1), `eval(2*$2-1)') + ') + ifelse( $1, 1, `9:') +') +ifelse( ANSWER, `quotient', ` +DEFINE_COMPILERRT_FUNCTION(__udivsi3) + save %sp,-64,%sp ! do this for debugging + b divide + mov 0,SIGN ! result always nonnegative +DEFINE_COMPILERRT_FUNCTION(__divsi3) + save %sp,-64,%sp ! do this for debugging + orcc divisor,dividend,%g0 ! are either dividend or divisor negative + bge divide ! if not, skip this junk + xor divisor,dividend,SIGN ! record sign of result in sign of SIGN + tst divisor + bge 2f + tst dividend + ! divisor < 0 + bge divide + neg divisor + 2: + ! dividend < 0 + neg dividend + ! FALL THROUGH +',` +DEFINE_COMPILERRT_FUNCTION(__umodsi3) + save %sp,-64,%sp ! do this for debugging + b divide + mov 0,SIGN ! result always nonnegative +DEFINE_COMPILERRT_FUNCTION(__modsi3) + save %sp,-64,%sp ! do this for debugging + orcc divisor,dividend,%g0 ! are either dividend or divisor negative + bge divide ! if not, skip this junk + mov dividend,SIGN ! record sign of result in sign of SIGN + tst divisor + bge 2f + tst dividend + ! divisor < 0 + bge divide + neg divisor + 2: + ! dividend < 0 + neg dividend + ! FALL THROUGH +') + +divide: + ! Compute size of quotient, scale comparand. + orcc divisor,%g0,V ! movcc divisor,V + te 2 ! if divisor = 0 + mov dividend,R + mov 0,Q + sethi %hi(1<<(WORDSIZE-TOPBITS-1)),T + cmp R,T + blu not_really_big + mov 0,ITER + ! + ! Here, the dividend is >= 2ˆ(31-N) or so. We must be careful here, + ! as our usual N-at-a-shot divide step will cause overflow and havoc. + ! The total number of bits in the result here is N*ITER+SC, where + ! SC <= N. + ! Compute ITER in an unorthodox manner: know we need to Shift V into +! the top decade: so don't even bother to compare to R. +1: + cmp V,T + bgeu 3f + mov 1,SC + sll V,N,V + b 1b + inc ITER +! Now compute SC +2: addcc V,V,V + bcc not_too_big + add SC,1,SC + ! We're here if the divisor overflowed when Shifting. + ! This means that R has the high-order bit set. + ! Restore V and subtract from R. + sll T,TOPBITS,T ! high order bit + srl V,1,V ! rest of V + add V,T,V + b do_single_div + dec SC +not_too_big: +3: cmp V,R + blu 2b + nop + be do_single_div + nop +! V > R: went too far: back up 1 step +! srl V,1,V +! dec SC +! do single-bit divide steps +! +! We have to be careful here. We know that R >= V, so we can do the +! first divide step without thinking. BUT, the others are conditional, +! and are only done if R >= 0. Because both R and V may have the high- +! order bit set in the first step, just falling into the regular +! division loop will mess up the first time around. +! So we unroll slightly... +do_single_div: + deccc SC + bl end_regular_divide + nop + sub R,V,R + mov 1,Q + b end_single_divloop + nop +single_divloop: + sll Q,1,Q + bl 1f + srl V,1,V + ! R >= 0 + sub R,V,R + b 2f + inc Q + 1: ! R < 0 + add R,V,R + dec Q + 2: + end_single_divloop: + deccc SC + bge single_divloop + tst R + b end_regular_divide + nop + +not_really_big: +1: + sll V,N,V + cmp V,R + bleu 1b + inccc ITER + be got_result + dec ITER +do_regular_divide: + ! Do the main division iteration + tst R + ! Fall through into divide loop +divloop: + sll Q,N,Q + DEVELOP_QUOTIENT_BITS( 1, 0 ) +end_regular_divide: + deccc ITER + bge divloop + tst R + bge got_result + nop + ! non-restoring fixup here +ifelse( ANSWER, `quotient', +` dec Q +',` add R,divisor,R +') + +got_result: + tst SIGN + bge 1f + restore + ! answer < 0 + retl ! leaf-routine return +ifelse( ANSWER, `quotient', +` neg %o2,%o0 ! quotient <- -Q +',` neg %o3,%o0 ! remainder <- -R +') +1: + retl ! leaf-routine return +ifelse( ANSWER, `quotient', +` mov %o2,%o0 ! quotient <- Q +',` mov %o3,%o0 ! remainder <- R +') Added: head/contrib/compiler-rt/lib/sparc64/divsi3.S ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ head/contrib/compiler-rt/lib/sparc64/divsi3.S Thu Jan 12 17:55:22 2012 (r230025) @@ -0,0 +1,333 @@ +/* + * This m4 code has been taken from The SPARC Architecture Manual Version 8. + */ +/* + * Division/Remainder + * + * Input is: + * dividend -- the thing being divided + * divisor -- how many ways to divide it + * Important parameters: + * N -- how many bits per iteration we try to get + * as our current guess: + * WORDSIZE -- how many bits altogether we're talking about: + * obviously: + * A derived constant: + * TOPBITS -- how many bits are in the top "decade" of a number: + * + * Important variables are: + * Q -- the partial quotient under development -- initially 0 + * R -- the remainder so far -- initially == the dividend + * ITER -- number of iterations of the main division loop which will + * be required. Equal to CEIL( lg2(quotient)/4 ) + * Note that this is log_base_(2ˆ4) of the quotient. + * V -- the current comparand -- initially divisor*2ˆ(ITER*4-1) + * Cost: + * current estimate for non-large dividend is + * CEIL( lg2(quotient) / 4 ) x ( 10 + 74/2 ) + C + * a large dividend is one greater than 2ˆ(31-4 ) and takes a + * different path, as the upper bits of the quotient must be developed + * one bit at a time. + * This uses the m4 and cpp macro preprocessors. + */ +/* + * This is the recursive definition of how we develop quotient digits. + * It takes three important parameters: + * $1 -- the current depth, 1<=$1<=4 + * $2 -- the current accumulation of quotient bits + * 4 -- max depth + * We add a new bit to $2 and either recurse or insert the bits in the quotient. + * Dynamic input: + * %o3 -- current remainder + * %o2 -- current quotient + * %o5 -- current comparand + * cc -- set on current value of %o3 + * Dynamic output: + * %o3', %o2', %o5', cc' + */ +#include "../assembly.h" +.text + .align 4 +DEFINE_COMPILERRT_FUNCTION(__udivsi3) + save %sp,-64,%sp ! do this for debugging + b divide + mov 0,%g3 ! result always nonnegative +DEFINE_COMPILERRT_FUNCTION(__divsi3) + save %sp,-64,%sp ! do this for debugging + orcc %o1,%o0,%g0 ! are either %o0 or %o1 negative + bge divide ! if not, skip this junk + xor %o1,%o0,%g3 ! record sign of result in sign of %g3 + tst %o1 + bge 2f + tst %o0 + ! %o1 < 0 + bge divide + neg %o1 + 2: + ! %o0 < 0 + neg %o0 + ! FALL THROUGH +divide: + ! Compute size of quotient, scale comparand. + orcc %o1,%g0,%o5 ! movcc %o1,%o5 + te 2 ! if %o1 = 0 + mov %o0,%o3 + mov 0,%o2 + sethi %hi(1<<(32-4 -1)),%g1 + cmp %o3,%g1 + blu not_really_big + mov 0,%o4 + ! + ! Here, the %o0 is >= 2ˆ(31-4) or so. We must be careful here, + ! as our usual 4-at-a-shot divide step will cause overflow and havoc. + ! The total number of bits in the result here is 4*%o4+%g2, where + ! %g2 <= 4. + ! Compute %o4 in an unorthodox manner: know we need to Shift %o5 into +! the top decade: so don't even bother to compare to %o3. +1: + cmp %o5,%g1 + bgeu 3f + mov 1,%g2 + sll %o5,4,%o5 + b 1b + inc %o4 +! Now compute %g2 +2: addcc %o5,%o5,%o5 + bcc not_too_big + add %g2,1,%g2 + ! We're here if the %o1 overflowed when Shifting. + ! This means that %o3 has the high-order bit set. + ! Restore %o5 and subtract from %o3. + sll %g1,4 ,%g1 ! high order bit + srl %o5,1,%o5 ! rest of %o5 + add %o5,%g1,%o5 + b do_single_div + dec %g2 +not_too_big: +3: cmp %o5,%o3 + blu 2b + nop + be do_single_div + nop +! %o5 > %o3: went too far: back up 1 step +! srl %o5,1,%o5 +! dec %g2 +! do single-bit divide steps +! +! We have to be careful here. We know that %o3 >= %o5, so we can do the +! first divide step without thinking. BUT, the others are conditional, +! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high- +! order bit set in the first step, just falling into the regular +! division loop will mess up the first time around. +! So we unroll slightly... +do_single_div: + deccc %g2 + bl end_regular_divide + nop + sub %o3,%o5,%o3 + mov 1,%o2 + b end_single_divloop + nop +single_divloop: + sll %o2,1,%o2 + bl 1f + srl %o5,1,%o5 + ! %o3 >= 0 + sub %o3,%o5,%o3 + b 2f + inc %o2 + 1: ! %o3 < 0 + add %o3,%o5,%o3 + dec %o2 + 2: + end_single_divloop: + deccc %g2 + bge single_divloop + tst %o3 + b end_regular_divide + nop +not_really_big: +1: + sll %o5,4,%o5 + cmp %o5,%o3 + bleu 1b + inccc %o4 + be got_result + dec %o4 +do_regular_divide: + ! Do the main division iteration + tst %o3 + ! Fall through into divide loop +divloop: + sll %o2,4,%o2 + !depth 1, accumulated bits 0 + bl L.1.16 + srl %o5,1,%o5 + ! remainder is nonnegative + subcc %o3,%o5,%o3 + !depth 2, accumulated bits 1 + bl L.2.17 + srl %o5,1,%o5 + ! remainder is nonnegative + subcc %o3,%o5,%o3 + !depth 3, accumulated bits 3 + bl L.3.19 + srl %o5,1,%o5 + ! remainder is nonnegative + subcc %o3,%o5,%o3 + !depth 4, accumulated bits 7 + bl L.4.23 + srl %o5,1,%o5 + ! remainder is nonnegative + subcc %o3,%o5,%o3 + b 9f + add %o2, (7*2+1), %o2 +L.4.23: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (7*2-1), %o2 +L.3.19: + ! remainder is negative + addcc %o3,%o5,%o3 + !depth 4, accumulated bits 5 + bl L.4.21 + srl %o5,1,%o5 + ! remainder is nonnegative + subcc %o3,%o5,%o3 + b 9f + add %o2, (5*2+1), %o2 +L.4.21: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (5*2-1), %o2 +L.2.17: + ! remainder is negative + addcc %o3,%o5,%o3 + !depth 3, accumulated bits 1 + bl L.3.17 + srl %o5,1,%o5 + ! remainder is nonnegative + subcc %o3,%o5,%o3 + !depth 4, accumulated bits 3 + bl L.4.19 + srl %o5,1,%o5 + ! remainder is nonnegative + subcc %o3,%o5,%o3 + b 9f + add %o2, (3*2+1), %o2 +L.4.19: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (3*2-1), %o2 +L.3.17: + ! remainder is negative + addcc %o3,%o5,%o3 + !depth 4, accumulated bits 1 + bl L.4.17 + srl %o5,1,%o5 + ! remainder is nonnegative + subcc %o3,%o5,%o3 + b 9f + add %o2, (1*2+1), %o2 +L.4.17: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (1*2-1), %o2 +L.1.16: + ! remainder is negative + addcc %o3,%o5,%o3 + !depth 2, accumulated bits -1 + bl L.2.15 + srl %o5,1,%o5 + ! remainder is nonnegative + subcc %o3,%o5,%o3 + !depth 3, accumulated bits -1 + bl L.3.15 + srl %o5,1,%o5 + ! remainder is nonnegative + subcc %o3,%o5,%o3 + !depth 4, accumulated bits -1 + bl L.4.15 + srl %o5,1,%o5 + ! remainder is nonnegative + subcc %o3,%o5,%o3 + b 9f + add %o2, (-1*2+1), %o2 +L.4.15: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (-1*2-1), %o2 +L.3.15: + ! remainder is negative + addcc %o3,%o5,%o3 + !depth 4, accumulated bits -3 + bl L.4.13 + srl %o5,1,%o5 + ! remainder is nonnegative + subcc %o3,%o5,%o3 + b 9f + add %o2, (-3*2+1), %o2 +L.4.13: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (-3*2-1), %o2 +L.2.15: + ! remainder is negative + addcc %o3,%o5,%o3 + !depth 3, accumulated bits -3 + bl L.3.13 + srl %o5,1,%o5 + ! remainder is nonnegative + subcc %o3,%o5,%o3 + !depth 4, accumulated bits -5 + bl L.4.11 + srl %o5,1,%o5 + ! remainder is nonnegative + subcc %o3,%o5,%o3 + b 9f + add %o2, (-5*2+1), %o2 +L.4.11: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (-5*2-1), %o2 +L.3.13: + ! remainder is negative + addcc %o3,%o5,%o3 + !depth 4, accumulated bits -7 + bl L.4.9 + srl %o5,1,%o5 + ! remainder is nonnegative + subcc %o3,%o5,%o3 + b 9f + add %o2, (-7*2+1), %o2 +L.4.9: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (-7*2-1), %o2 + 9: +end_regular_divide: + deccc %o4 + bge divloop + tst %o3 + bge got_result + nop + ! non-restoring fixup here + dec %o2 +got_result: + tst %g3 + bge 1f + restore + ! answer < 0 + retl ! leaf-routine return + neg %o2,%o0 ! quotient <- -%o2 +1: + retl ! leaf-routine return + mov %o2,%o0 ! quotient <- %o2 Added: head/contrib/compiler-rt/lib/sparc64/generate.sh ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ head/contrib/compiler-rt/lib/sparc64/generate.sh Thu Jan 12 17:55:22 2012 (r230025) @@ -0,0 +1,6 @@ +#!/bin/sh + +m4 divmod.m4 | sed -e 's/[[:space:]]*$//' | grep -v '^$' > modsi3.S +m4 -DANSWER=quotient divmod.m4 | sed -e 's/[[:space:]]*$//' | grep -v '^$' > divsi3.S +echo '! This file intentionally left blank' > umodsi3.S +echo '! This file intentionally left blank' > udivsi3.S Added: head/contrib/compiler-rt/lib/sparc64/modsi3.S ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ head/contrib/compiler-rt/lib/sparc64/modsi3.S Thu Jan 12 17:55:22 2012 (r230025) @@ -0,0 +1,333 @@ +/* + * This m4 code has been taken from The SPARC Architecture Manual Version 8. + */ +/* + * Division/Remainder + * + * Input is: + * dividend -- the thing being divided + * divisor -- how many ways to divide it + * Important parameters: + * N -- how many bits per iteration we try to get + * as our current guess: + * WORDSIZE -- how many bits altogether we're talking about: + * obviously: + * A derived constant: + * TOPBITS -- how many bits are in the top "decade" of a number: + * + * Important variables are: + * Q -- the partial quotient under development -- initially 0 + * R -- the remainder so far -- initially == the dividend + * ITER -- number of iterations of the main division loop which will + * be required. Equal to CEIL( lg2(quotient)/4 ) + * Note that this is log_base_(2ˆ4) of the quotient. + * V -- the current comparand -- initially divisor*2ˆ(ITER*4-1) + * Cost: + * current estimate for non-large dividend is + * CEIL( lg2(quotient) / 4 ) x ( 10 + 74/2 ) + C + * a large dividend is one greater than 2ˆ(31-4 ) and takes a + * different path, as the upper bits of the quotient must be developed + * one bit at a time. + * This uses the m4 and cpp macro preprocessors. + */ +/* + * This is the recursive definition of how we develop quotient digits. + * It takes three important parameters: + * $1 -- the current depth, 1<=$1<=4 + * $2 -- the current accumulation of quotient bits + * 4 -- max depth + * We add a new bit to $2 and either recurse or insert the bits in the quotient. + * Dynamic input: + * %o3 -- current remainder + * %o2 -- current quotient + * %o5 -- current comparand + * cc -- set on current value of %o3 + * Dynamic output: + * %o3', %o2', %o5', cc' + */ +#include "../assembly.h" +.text + .align 4 +DEFINE_COMPILERRT_FUNCTION(__umodsi3) + save %sp,-64,%sp ! do this for debugging + b divide + mov 0,%g3 ! result always nonnegative +DEFINE_COMPILERRT_FUNCTION(__modsi3) + save %sp,-64,%sp ! do this for debugging + orcc %o1,%o0,%g0 ! are either %o0 or %o1 negative + bge divide ! if not, skip this junk + mov %o0,%g3 ! record sign of result in sign of %g3 + tst %o1 + bge 2f + tst %o0 + ! %o1 < 0 + bge divide + neg %o1 + 2: + ! %o0 < 0 + neg %o0 + ! FALL THROUGH +divide: + ! Compute size of quotient, scale comparand. + orcc %o1,%g0,%o5 ! movcc %o1,%o5 + te 2 ! if %o1 = 0 + mov %o0,%o3 + mov 0,%o2 + sethi %hi(1<<(32-4 -1)),%g1 + cmp %o3,%g1 + blu not_really_big + mov 0,%o4 + ! + ! Here, the %o0 is >= 2ˆ(31-4) or so. We must be careful here, + ! as our usual 4-at-a-shot divide step will cause overflow and havoc. + ! The total number of bits in the result here is 4*%o4+%g2, where + ! %g2 <= 4. + ! Compute %o4 in an unorthodox manner: know we need to Shift %o5 into +! the top decade: so don't even bother to compare to %o3. +1: + cmp %o5,%g1 + bgeu 3f + mov 1,%g2 + sll %o5,4,%o5 + b 1b + inc %o4 +! Now compute %g2 +2: addcc %o5,%o5,%o5 + bcc not_too_big + add %g2,1,%g2 + ! We're here if the %o1 overflowed when Shifting. + ! This means that %o3 has the high-order bit set. + ! Restore %o5 and subtract from %o3. + sll %g1,4 ,%g1 ! high order bit + srl %o5,1,%o5 ! rest of %o5 + add %o5,%g1,%o5 + b do_single_div + dec %g2 +not_too_big: +3: cmp %o5,%o3 + blu 2b + nop + be do_single_div + nop +! %o5 > %o3: went too far: back up 1 step +! srl %o5,1,%o5 +! dec %g2 +! do single-bit divide steps +! +! We have to be careful here. We know that %o3 >= %o5, so we can do the +! first divide step without thinking. BUT, the others are conditional, +! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high- +! order bit set in the first step, just falling into the regular +! division loop will mess up the first time around. +! So we unroll slightly... +do_single_div: + deccc %g2 + bl end_regular_divide + nop + sub %o3,%o5,%o3 + mov 1,%o2 + b end_single_divloop + nop +single_divloop: + sll %o2,1,%o2 + bl 1f + srl %o5,1,%o5 + ! %o3 >= 0 + sub %o3,%o5,%o3 + b 2f + inc %o2 + 1: ! %o3 < 0 + add %o3,%o5,%o3 + dec %o2 + 2: + end_single_divloop: + deccc %g2 + bge single_divloop + tst %o3 + b end_regular_divide + nop +not_really_big: +1: + sll %o5,4,%o5 + cmp %o5,%o3 + bleu 1b + inccc %o4 + be got_result + dec %o4 +do_regular_divide: + ! Do the main division iteration + tst %o3 + ! Fall through into divide loop +divloop: + sll %o2,4,%o2 + !depth 1, accumulated bits 0 + bl L.1.16 + srl %o5,1,%o5 + ! remainder is nonnegative + subcc %o3,%o5,%o3 + !depth 2, accumulated bits 1 + bl L.2.17 + srl %o5,1,%o5 + ! remainder is nonnegative + subcc %o3,%o5,%o3 + !depth 3, accumulated bits 3 + bl L.3.19 + srl %o5,1,%o5 + ! remainder is nonnegative + subcc %o3,%o5,%o3 + !depth 4, accumulated bits 7 + bl L.4.23 + srl %o5,1,%o5 + ! remainder is nonnegative + subcc %o3,%o5,%o3 + b 9f + add %o2, (7*2+1), %o2 +L.4.23: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (7*2-1), %o2 +L.3.19: + ! remainder is negative + addcc %o3,%o5,%o3 + !depth 4, accumulated bits 5 + bl L.4.21 + srl %o5,1,%o5 + ! remainder is nonnegative + subcc %o3,%o5,%o3 + b 9f + add %o2, (5*2+1), %o2 +L.4.21: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (5*2-1), %o2 +L.2.17: + ! remainder is negative + addcc %o3,%o5,%o3 + !depth 3, accumulated bits 1 + bl L.3.17 + srl %o5,1,%o5 + ! remainder is nonnegative + subcc %o3,%o5,%o3 + !depth 4, accumulated bits 3 + bl L.4.19 + srl %o5,1,%o5 + ! remainder is nonnegative + subcc %o3,%o5,%o3 + b 9f + add %o2, (3*2+1), %o2 +L.4.19: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (3*2-1), %o2 +L.3.17: + ! remainder is negative + addcc %o3,%o5,%o3 + !depth 4, accumulated bits 1 + bl L.4.17 + srl %o5,1,%o5 + ! remainder is nonnegative + subcc %o3,%o5,%o3 + b 9f + add %o2, (1*2+1), %o2 +L.4.17: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (1*2-1), %o2 +L.1.16: + ! remainder is negative + addcc %o3,%o5,%o3 + !depth 2, accumulated bits -1 + bl L.2.15 + srl %o5,1,%o5 + ! remainder is nonnegative + subcc %o3,%o5,%o3 + !depth 3, accumulated bits -1 + bl L.3.15 + srl %o5,1,%o5 + ! remainder is nonnegative + subcc %o3,%o5,%o3 + !depth 4, accumulated bits -1 + bl L.4.15 + srl %o5,1,%o5 + ! remainder is nonnegative + subcc %o3,%o5,%o3 + b 9f + add %o2, (-1*2+1), %o2 +L.4.15: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (-1*2-1), %o2 +L.3.15: + ! remainder is negative + addcc %o3,%o5,%o3 + !depth 4, accumulated bits -3 + bl L.4.13 + srl %o5,1,%o5 + ! remainder is nonnegative + subcc %o3,%o5,%o3 + b 9f + add %o2, (-3*2+1), %o2 +L.4.13: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (-3*2-1), %o2 +L.2.15: + ! remainder is negative + addcc %o3,%o5,%o3 + !depth 3, accumulated bits -3 + bl L.3.13 + srl %o5,1,%o5 + ! remainder is nonnegative + subcc %o3,%o5,%o3 + !depth 4, accumulated bits -5 + bl L.4.11 + srl %o5,1,%o5 + ! remainder is nonnegative + subcc %o3,%o5,%o3 + b 9f + add %o2, (-5*2+1), %o2 +L.4.11: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (-5*2-1), %o2 +L.3.13: + ! remainder is negative + addcc %o3,%o5,%o3 + !depth 4, accumulated bits -7 + bl L.4.9 + srl %o5,1,%o5 + ! remainder is nonnegative + subcc %o3,%o5,%o3 + b 9f + add %o2, (-7*2+1), %o2 +L.4.9: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (-7*2-1), %o2 + 9: +end_regular_divide: + deccc %o4 + bge divloop + tst %o3 + bge got_result + nop + ! non-restoring fixup here + add %o3,%o1,%o3 +got_result: + tst %g3 + bge 1f + restore + ! answer < 0 + retl ! leaf-routine return + neg %o3,%o0 ! remainder <- -%o3 +1: + retl ! leaf-routine return + mov %o3,%o0 ! remainder <- %o3 Added: head/contrib/compiler-rt/lib/sparc64/udivsi3.S ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ head/contrib/compiler-rt/lib/sparc64/udivsi3.S Thu Jan 12 17:55:22 2012 (r230025) @@ -0,0 +1 @@ +! This file intentionally left blank Added: head/contrib/compiler-rt/lib/sparc64/umodsi3.S ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ head/contrib/compiler-rt/lib/sparc64/umodsi3.S Thu Jan 12 17:55:22 2012 (r230025) @@ -0,0 +1 @@ +! This file intentionally left blank