software/libbase: upgrade softfloat to version 2b + add support for more precision

This commit is contained in:
Sebastien Bourdeauducq 2012-05-24 23:21:18 +02:00
parent 22f7d1716e
commit 97b77945e5
5 changed files with 3268 additions and 1930 deletions

View file

@ -1,9 +1,8 @@
/* /*============================================================================
===============================================================================
This C header file is part of the SoftFloat IEC/IEEE Floating-point This C header file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic
Arithmetic Package, Release 2. Package, Release 2b.
Written by John R. Hauser. This work was made possible in part by the Written by John R. Hauser. This work was made possible in part by the
International Computer Science Institute, located at Suite 600, 1947 Center International Computer Science Institute, located at Suite 600, 1947 Center
@ -12,54 +11,48 @@ National Science Foundation under grant MIP-9311980. The original version
of this code was written as part of a project to build a fixed-point vector of this code was written as part of a project to build a fixed-point vector
processor in collaboration with the University of California at Berkeley, processor in collaboration with the University of California at Berkeley,
overseen by Profs. Nelson Morgan and John Wawrzynek. More information overseen by Profs. Nelson Morgan and John Wawrzynek. More information
is available through the Web page `http://http.cs.berkeley.edu/~jhauser/ is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
arithmetic/softfloat.html'. arithmetic/SoftFloat.html'.
THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has
has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
Derivative works are acceptable, even for commercial purposes, so long as Derivative works are acceptable, even for commercial purposes, so long as
(1) they include prominent notice that the work is derivative, and (2) they (1) the source code for the derivative work includes prominent notice that
include prominent notice akin to these three paragraphs for those parts of the work is derivative, and (2) the source code includes prominent notice with
this code that are retained. these four paragraphs for those parts of this code that are retained.
=============================================================================== =============================================================================*/
*/
/* /*----------------------------------------------------------------------------
------------------------------------------------------------------------------- | Include common integer types and flags.
Common integer types and flags. *----------------------------------------------------------------------------*/
-------------------------------------------------------------------------------
*/
/* /*----------------------------------------------------------------------------
------------------------------------------------------------------------------- | One of the macros `BIGENDIAN' or `LITTLEENDIAN' must be defined.
One of the macros `BIGENDIAN' or `LITTLEENDIAN' must be defined. *----------------------------------------------------------------------------*/
-------------------------------------------------------------------------------
*/
#define BIGENDIAN #define BIGENDIAN
/* /*----------------------------------------------------------------------------
------------------------------------------------------------------------------- | The macro `BITS64' can be defined to indicate that 64-bit integer types are
The macro `BITS64' can be defined to indicate that 64-bit integer types are | supported by the compiler.
supported by the compiler. *----------------------------------------------------------------------------*/
-------------------------------------------------------------------------------
*/
//#define BITS64 //#define BITS64
/* /*----------------------------------------------------------------------------
------------------------------------------------------------------------------- | Each of the following `typedef's defines the most convenient type that holds
Each of the following `typedef's defines the most convenient type that holds | integers of at least as many bits as specified. For example, `uint8' should
integers of at least as many bits as specified. For example, `uint8' should | be the most convenient type that can hold unsigned integers of as many as
be the most convenient type that can hold unsigned integers of as many as | 8 bits. The `flag' type must be able to hold either a 0 or 1. For most
8 bits. The `flag' type must be able to hold either a 0 or 1. For most | implementations of C, `flag', `uint8', and `int8' should all be `typedef'ed
implementations of C, `flag', `uint8', and `int8' should all be `typedef'ed | to the same as `int'.
to the same as `int'. *----------------------------------------------------------------------------*/
-------------------------------------------------------------------------------
*/
typedef int flag; typedef int flag;
typedef int uint8; typedef int uint8;
typedef int int8; typedef int int8;
@ -68,18 +61,16 @@ typedef int int16;
typedef unsigned int uint32; typedef unsigned int uint32;
typedef signed int int32; typedef signed int int32;
#ifdef BITS64 #ifdef BITS64
typedef unsigned long long int bits64; typedef unsigned long long int uint64;
typedef signed long long int sbits64; typedef signed long long int int64;
#endif #endif
/* /*----------------------------------------------------------------------------
------------------------------------------------------------------------------- | Each of the following `typedef's defines a type that holds integers
Each of the following `typedef's defines a type that holds integers | of _exactly_ the number of bits specified. For instance, for most
of _exactly_ the number of bits specified. For instance, for most | implementation of C, `bits16' and `sbits16' should be `typedef'ed to
implementation of C, `bits16' and `sbits16' should be `typedef'ed to | `unsigned short int' and `signed short int' (or `short int'), respectively.
`unsigned short int' and `signed short int' (or `short int'), respectively. *----------------------------------------------------------------------------*/
-------------------------------------------------------------------------------
*/
typedef unsigned char bits8; typedef unsigned char bits8;
typedef signed char sbits8; typedef signed char sbits8;
typedef unsigned short int bits16; typedef unsigned short int bits16;
@ -87,38 +78,33 @@ typedef signed short int sbits16;
typedef unsigned int bits32; typedef unsigned int bits32;
typedef signed int sbits32; typedef signed int sbits32;
#ifdef BITS64 #ifdef BITS64
typedef unsigned long long int uint64; typedef unsigned long long int bits64;
typedef signed long long int int64; typedef signed long long int sbits64;
#endif #endif
#ifdef BITS64 #ifdef BITS64
/* /*----------------------------------------------------------------------------
------------------------------------------------------------------------------- | The `LIT64' macro takes as its argument a textual integer literal and
The `LIT64' macro takes as its argument a textual integer literal and if | if necessary ``marks'' the literal as having a 64-bit integer type.
necessary ``marks'' the literal as having a 64-bit integer type. For | For example, the GNU C Compiler (`gcc') requires that 64-bit literals be
example, the Gnu C Compiler (`gcc') requires that 64-bit literals be | appended with the letters `LL' standing for `long long', which is `gcc's
appended with the letters `LL' standing for `long long', which is `gcc's | name for the 64-bit integer type. Some compilers may allow `LIT64' to be
name for the 64-bit integer type. Some compilers may allow `LIT64' to be | defined as the identity macro: `#define LIT64( a ) a'.
defined as the identity macro: `#define LIT64( a ) a'. *----------------------------------------------------------------------------*/
-------------------------------------------------------------------------------
*/
#define LIT64( a ) a##LL #define LIT64( a ) a##LL
#endif #endif
/* /*----------------------------------------------------------------------------
------------------------------------------------------------------------------- | The macro `INLINE' can be used before functions that should be inlined. If
The macro `INLINE' can be used before functions that should be inlined. If | a compiler does not support explicit inlining, this macro should be defined
a compiler does not support explicit inlining, this macro should be defined | to be `static'.
to be `static'. *----------------------------------------------------------------------------*/
-------------------------------------------------------------------------------
*/
#define INLINE extern inline #define INLINE extern inline
/*
------------------------------------------------------------------------------- /*----------------------------------------------------------------------------
Symbolic Boolean literals. | Symbolic Boolean literals.
------------------------------------------------------------------------------- *----------------------------------------------------------------------------*/
*/
enum { enum {
FALSE = 0, FALSE = 0,
TRUE = 1 TRUE = 1

View file

@ -1,9 +1,8 @@
/* /*============================================================================
===============================================================================
This C source fragment is part of the SoftFloat IEC/IEEE Floating-point This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
Arithmetic Package, Release 2. Arithmetic Package, Release 2b.
Written by John R. Hauser. This work was made possible in part by the Written by John R. Hauser. This work was made possible in part by the
International Computer Science Institute, located at Suite 600, 1947 Center International Computer Science Institute, located at Suite 600, 1947 Center
@ -12,33 +11,34 @@ National Science Foundation under grant MIP-9311980. The original version
of this code was written as part of a project to build a fixed-point vector of this code was written as part of a project to build a fixed-point vector
processor in collaboration with the University of California at Berkeley, processor in collaboration with the University of California at Berkeley,
overseen by Profs. Nelson Morgan and John Wawrzynek. More information overseen by Profs. Nelson Morgan and John Wawrzynek. More information
is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
arithmetic/softfloat.html'. arithmetic/SoftFloat.html'.
THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has
has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
Derivative works are acceptable, even for commercial purposes, so long as Derivative works are acceptable, even for commercial purposes, so long as
(1) they include prominent notice that the work is derivative, and (2) they (1) the source code for the derivative work includes prominent notice that
include prominent notice akin to these three paragraphs for those parts of the work is derivative, and (2) the source code includes prominent notice with
this code that are retained. these four paragraphs for those parts of this code that are retained.
=============================================================================== =============================================================================*/
*/
/*----------------------------------------------------------------------------
| Shifts `a' right by the number of bits given in `count'. If any nonzero
| bits are shifted off, they are ``jammed'' into the least significant bit of
| the result by setting the least significant bit to 1. The value of `count'
| can be arbitrarily large; in particular, if `count' is greater than 32, the
| result will be either 0 or 1, depending on whether `a' is zero or nonzero.
| The result is stored in the location pointed to by `zPtr'.
*----------------------------------------------------------------------------*/
/*
-------------------------------------------------------------------------------
Shifts `a' right by the number of bits given in `count'. If any nonzero
bits are shifted off, they are ``jammed'' into the least significant bit of
the result by setting the least significant bit to 1. The value of `count'
can be arbitrarily large; in particular, if `count' is greater than 32, the
result will be either 0 or 1, depending on whether `a' is zero or nonzero.
The result is stored in the location pointed to by `zPtr'.
-------------------------------------------------------------------------------
*/
INLINE void shift32RightJamming( bits32 a, int16 count, bits32 *zPtr ) INLINE void shift32RightJamming( bits32 a, int16 count, bits32 *zPtr )
{ {
bits32 z; bits32 z;
@ -56,15 +56,14 @@ INLINE void shift32RightJamming( bits32 a, int16 count, bits32 *zPtr )
} }
/* /*----------------------------------------------------------------------------
------------------------------------------------------------------------------- | Shifts the 64-bit value formed by concatenating `a0' and `a1' right by the
Shifts the 64-bit value formed by concatenating `a0' and `a1' right by the | number of bits given in `count'. Any bits shifted off are lost. The value
number of bits given in `count'. Any bits shifted off are lost. The value | of `count' can be arbitrarily large; in particular, if `count' is greater
of `count' can be arbitrarily large; in particular, if `count' is greater | than 64, the result will be 0. The result is broken into two 32-bit pieces
than 64, the result will be 0. The result is broken into two 32-bit pieces | which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. *----------------------------------------------------------------------------*/
-------------------------------------------------------------------------------
*/
INLINE void INLINE void
shift64Right( shift64Right(
bits32 a0, bits32 a1, int16 count, bits32 *z0Ptr, bits32 *z1Ptr ) bits32 a0, bits32 a1, int16 count, bits32 *z0Ptr, bits32 *z1Ptr )
@ -89,18 +88,17 @@ INLINE void
} }
/* /*----------------------------------------------------------------------------
------------------------------------------------------------------------------- | Shifts the 64-bit value formed by concatenating `a0' and `a1' right by the
Shifts the 64-bit value formed by concatenating `a0' and `a1' right by the | number of bits given in `count'. If any nonzero bits are shifted off, they
number of bits given in `count'. If any nonzero bits are shifted off, they | are ``jammed'' into the least significant bit of the result by setting the
are ``jammed'' into the least significant bit of the result by setting the | least significant bit to 1. The value of `count' can be arbitrarily large;
least significant bit to 1. The value of `count' can be arbitrarily large; | in particular, if `count' is greater than 64, the result will be either 0
in particular, if `count' is greater than 64, the result will be either 0 | or 1, depending on whether the concatenation of `a0' and `a1' is zero or
or 1, depending on whether the concatenation of `a0' and `a1' is zero or | nonzero. The result is broken into two 32-bit pieces which are stored at
nonzero. The result is broken into two 32-bit pieces which are stored at | the locations pointed to by `z0Ptr' and `z1Ptr'.
the locations pointed to by `z0Ptr' and `z1Ptr'. *----------------------------------------------------------------------------*/
-------------------------------------------------------------------------------
*/
INLINE void INLINE void
shift64RightJamming( shift64RightJamming(
bits32 a0, bits32 a1, int16 count, bits32 *z0Ptr, bits32 *z1Ptr ) bits32 a0, bits32 a1, int16 count, bits32 *z0Ptr, bits32 *z1Ptr )
@ -133,26 +131,25 @@ INLINE void
} }
/* /*----------------------------------------------------------------------------
------------------------------------------------------------------------------- | Shifts the 96-bit value formed by concatenating `a0', `a1', and `a2' right
Shifts the 96-bit value formed by concatenating `a0', `a1', and `a2' right | by 32 _plus_ the number of bits given in `count'. The shifted result is
by 32 _plus_ the number of bits given in `count'. The shifted result is | at most 64 nonzero bits; these are broken into two 32-bit pieces which are
at most 64 nonzero bits; these are broken into two 32-bit pieces which are | stored at the locations pointed to by `z0Ptr' and `z1Ptr'. The bits shifted
stored at the locations pointed to by `z0Ptr' and `z1Ptr'. The bits shifted | off form a third 32-bit result as follows: The _last_ bit shifted off is
off form a third 32-bit result as follows: The _last_ bit shifted off is | the most-significant bit of the extra result, and the other 31 bits of the
the most-significant bit of the extra result, and the other 31 bits of the | extra result are all zero if and only if _all_but_the_last_ bits shifted off
extra result are all zero if and only if _all_but_the_last_ bits shifted off | were all zero. This extra result is stored in the location pointed to by
were all zero. This extra result is stored in the location pointed to by | `z2Ptr'. The value of `count' can be arbitrarily large.
`z2Ptr'. The value of `count' can be arbitrarily large. | (This routine makes more sense if `a0', `a1', and `a2' are considered
(This routine makes more sense if `a0', `a1', and `a2' are considered | to form a fixed-point value with binary point between `a1' and `a2'. This
to form a fixed-point value with binary point between `a1' and `a2'. This | fixed-point value is shifted right by the number of bits given in `count',
fixed-point value is shifted right by the number of bits given in `count', | and the integer part of the result is returned at the locations pointed to
and the integer part of the result is returned at the locations pointed to | by `z0Ptr' and `z1Ptr'. The fractional part of the result may be slightly
by `z0Ptr' and `z1Ptr'. The fractional part of the result may be slightly | corrupted as described above, and is returned at the location pointed to by
corrupted as described above, and is returned at the location pointed to by | `z2Ptr'.)
`z2Ptr'.) *----------------------------------------------------------------------------*/
-------------------------------------------------------------------------------
*/
INLINE void INLINE void
shift64ExtraRightJamming( shift64ExtraRightJamming(
bits32 a0, bits32 a0,
@ -204,14 +201,13 @@ INLINE void
} }
/* /*----------------------------------------------------------------------------
------------------------------------------------------------------------------- | Shifts the 64-bit value formed by concatenating `a0' and `a1' left by the
Shifts the 64-bit value formed by concatenating `a0' and `a1' left by the | number of bits given in `count'. Any bits shifted off are lost. The value
number of bits given in `count'. Any bits shifted off are lost. The value | of `count' must be less than 32. The result is broken into two 32-bit
of `count' must be less than 32. The result is broken into two 32-bit | pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. *----------------------------------------------------------------------------*/
-------------------------------------------------------------------------------
*/
INLINE void INLINE void
shortShift64Left( shortShift64Left(
bits32 a0, bits32 a1, int16 count, bits32 *z0Ptr, bits32 *z1Ptr ) bits32 a0, bits32 a1, int16 count, bits32 *z0Ptr, bits32 *z1Ptr )
@ -223,15 +219,14 @@ INLINE void
} }
/* /*----------------------------------------------------------------------------
------------------------------------------------------------------------------- | Shifts the 96-bit value formed by concatenating `a0', `a1', and `a2' left
Shifts the 96-bit value formed by concatenating `a0', `a1', and `a2' left by | by the number of bits given in `count'. Any bits shifted off are lost.
the number of bits given in `count'. Any bits shifted off are lost. The | The value of `count' must be less than 32. The result is broken into three
value of `count' must be less than 32. The result is broken into three | 32-bit pieces which are stored at the locations pointed to by `z0Ptr',
32-bit pieces which are stored at the locations pointed to by `z0Ptr', | `z1Ptr', and `z2Ptr'.
`z1Ptr', and `z2Ptr'. *----------------------------------------------------------------------------*/
-------------------------------------------------------------------------------
*/
INLINE void INLINE void
shortShift96Left( shortShift96Left(
bits32 a0, bits32 a0,
@ -260,14 +255,13 @@ INLINE void
} }
/* /*----------------------------------------------------------------------------
------------------------------------------------------------------------------- | Adds the 64-bit value formed by concatenating `a0' and `a1' to the 64-bit
Adds the 64-bit value formed by concatenating `a0' and `a1' to the 64-bit | value formed by concatenating `b0' and `b1'. Addition is modulo 2^64, so
value formed by concatenating `b0' and `b1'. Addition is modulo 2^64, so | any carry out is lost. The result is broken into two 32-bit pieces which
any carry out is lost. The result is broken into two 32-bit pieces which | are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. *----------------------------------------------------------------------------*/
-------------------------------------------------------------------------------
*/
INLINE void INLINE void
add64( add64(
bits32 a0, bits32 a1, bits32 b0, bits32 b1, bits32 *z0Ptr, bits32 *z1Ptr ) bits32 a0, bits32 a1, bits32 b0, bits32 b1, bits32 *z0Ptr, bits32 *z1Ptr )
@ -280,15 +274,14 @@ INLINE void
} }
/* /*----------------------------------------------------------------------------
------------------------------------------------------------------------------- | Adds the 96-bit value formed by concatenating `a0', `a1', and `a2' to the
Adds the 96-bit value formed by concatenating `a0', `a1', and `a2' to the | 96-bit value formed by concatenating `b0', `b1', and `b2'. Addition is
96-bit value formed by concatenating `b0', `b1', and `b2'. Addition is | modulo 2^96, so any carry out is lost. The result is broken into three
modulo 2^96, so any carry out is lost. The result is broken into three | 32-bit pieces which are stored at the locations pointed to by `z0Ptr',
32-bit pieces which are stored at the locations pointed to by `z0Ptr', | `z1Ptr', and `z2Ptr'.
`z1Ptr', and `z2Ptr'. *----------------------------------------------------------------------------*/
-------------------------------------------------------------------------------
*/
INLINE void INLINE void
add96( add96(
bits32 a0, bits32 a0,
@ -319,15 +312,14 @@ INLINE void
} }
/* /*----------------------------------------------------------------------------
------------------------------------------------------------------------------- | Subtracts the 64-bit value formed by concatenating `b0' and `b1' from the
Subtracts the 64-bit value formed by concatenating `b0' and `b1' from the | 64-bit value formed by concatenating `a0' and `a1'. Subtraction is modulo
64-bit value formed by concatenating `a0' and `a1'. Subtraction is modulo | 2^64, so any borrow out (carry out) is lost. The result is broken into two
2^64, so any borrow out (carry out) is lost. The result is broken into two | 32-bit pieces which are stored at the locations pointed to by `z0Ptr' and
32-bit pieces which are stored at the locations pointed to by `z0Ptr' and | `z1Ptr'.
`z1Ptr'. *----------------------------------------------------------------------------*/
-------------------------------------------------------------------------------
*/
INLINE void INLINE void
sub64( sub64(
bits32 a0, bits32 a1, bits32 b0, bits32 b1, bits32 *z0Ptr, bits32 *z1Ptr ) bits32 a0, bits32 a1, bits32 b0, bits32 b1, bits32 *z0Ptr, bits32 *z1Ptr )
@ -338,15 +330,14 @@ INLINE void
} }
/* /*----------------------------------------------------------------------------
------------------------------------------------------------------------------- | Subtracts the 96-bit value formed by concatenating `b0', `b1', and `b2' from
Subtracts the 96-bit value formed by concatenating `b0', `b1', and `b2' from | the 96-bit value formed by concatenating `a0', `a1', and `a2'. Subtraction
the 96-bit value formed by concatenating `a0', `a1', and `a2'. Subtraction | is modulo 2^96, so any borrow out (carry out) is lost. The result is broken
is modulo 2^96, so any borrow out (carry out) is lost. The result is broken | into three 32-bit pieces which are stored at the locations pointed to by
into three 32-bit pieces which are stored at the locations pointed to by | `z0Ptr', `z1Ptr', and `z2Ptr'.
`z0Ptr', `z1Ptr', and `z2Ptr'. *----------------------------------------------------------------------------*/
-------------------------------------------------------------------------------
*/
INLINE void INLINE void
sub96( sub96(
bits32 a0, bits32 a0,
@ -377,13 +368,12 @@ INLINE void
} }
/* /*----------------------------------------------------------------------------
------------------------------------------------------------------------------- | Multiplies `a' by `b' to obtain a 64-bit product. The product is broken
Multiplies `a' by `b' to obtain a 64-bit product. The product is broken | into two 32-bit pieces which are stored at the locations pointed to by
into two 32-bit pieces which are stored at the locations pointed to by | `z0Ptr' and `z1Ptr'.
`z0Ptr' and `z1Ptr'. *----------------------------------------------------------------------------*/
-------------------------------------------------------------------------------
*/
INLINE void mul32To64( bits32 a, bits32 b, bits32 *z0Ptr, bits32 *z1Ptr ) INLINE void mul32To64( bits32 a, bits32 b, bits32 *z0Ptr, bits32 *z1Ptr )
{ {
bits16 aHigh, aLow, bHigh, bLow; bits16 aHigh, aLow, bHigh, bLow;
@ -407,14 +397,13 @@ INLINE void mul32To64( bits32 a, bits32 b, bits32 *z0Ptr, bits32 *z1Ptr )
} }
/* /*----------------------------------------------------------------------------
------------------------------------------------------------------------------- | Multiplies the 64-bit value formed by concatenating `a0' and `a1' by `b'
Multiplies the 64-bit value formed by concatenating `a0' and `a1' by `b' to | to obtain a 96-bit product. The product is broken into three 32-bit pieces
obtain a 96-bit product. The product is broken into three 32-bit pieces | which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and
which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and | `z2Ptr'.
`z2Ptr'. *----------------------------------------------------------------------------*/
-------------------------------------------------------------------------------
*/
INLINE void INLINE void
mul64By32To96( mul64By32To96(
bits32 a0, bits32 a0,
@ -436,14 +425,13 @@ INLINE void
} }
/* /*----------------------------------------------------------------------------
------------------------------------------------------------------------------- | Multiplies the 64-bit value formed by concatenating `a0' and `a1' to the
Multiplies the 64-bit value formed by concatenating `a0' and `a1' to the | 64-bit value formed by concatenating `b0' and `b1' to obtain a 128-bit
64-bit value formed by concatenating `b0' and `b1' to obtain a 128-bit | product. The product is broken into four 32-bit pieces which are stored at
product. The product is broken into four 32-bit pieces which are stored at | the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'. *----------------------------------------------------------------------------*/
-------------------------------------------------------------------------------
*/
INLINE void INLINE void
mul64To128( mul64To128(
bits32 a0, bits32 a0,
@ -474,16 +462,15 @@ INLINE void
} }
/* /*----------------------------------------------------------------------------
------------------------------------------------------------------------------- | Returns an approximation to the 32-bit integer quotient obtained by dividing
Returns an approximation to the 32-bit integer quotient obtained by dividing | `b' into the 64-bit value formed by concatenating `a0' and `a1'. The
`b' into the 64-bit value formed by concatenating `a0' and `a1'. The divisor | divisor `b' must be at least 2^31. If q is the exact quotient truncated
`b' must be at least 2^31. If q is the exact quotient truncated toward | toward zero, the approximation returned lies between q and q + 2 inclusive.
zero, the approximation returned lies between q and q + 2 inclusive. If | If the exact quotient q is larger than 32 bits, the maximum positive 32-bit
the exact quotient q is larger than 32 bits, the maximum positive 32-bit | unsigned integer is returned.
unsigned integer is returned. *----------------------------------------------------------------------------*/
-------------------------------------------------------------------------------
*/
static bits32 estimateDiv64To32( bits32 a0, bits32 a1, bits32 b ) static bits32 estimateDiv64To32( bits32 a0, bits32 a1, bits32 b )
{ {
bits32 b0, b1; bits32 b0, b1;
@ -506,17 +493,16 @@ static bits32 estimateDiv64To32( bits32 a0, bits32 a1, bits32 b )
} }
/* /*----------------------------------------------------------------------------
------------------------------------------------------------------------------- | Returns an approximation to the square root of the 32-bit significand given
Returns an approximation to the square root of the 32-bit significand given | by `a'. Considered as an integer, `a' must be at least 2^31. If bit 0 of
by `a'. Considered as an integer, `a' must be at least 2^31. If bit 0 of | `aExp' (the least significant bit) is 1, the integer returned approximates
`aExp' (the least significant bit) is 1, the integer returned approximates | 2^31*sqrt(`a'/2^31), where `a' is considered an integer. If bit 0 of `aExp'
2^31*sqrt(`a'/2^31), where `a' is considered an integer. If bit 0 of `aExp' | is 0, the integer returned approximates 2^31*sqrt(`a'/2^30). In either
is 0, the integer returned approximates 2^31*sqrt(`a'/2^30). In either | case, the approximation returned lies strictly within +/-2 of the exact
case, the approximation returned lies strictly within +/-2 of the exact | value.
value. *----------------------------------------------------------------------------*/
-------------------------------------------------------------------------------
*/
static bits32 estimateSqrt32( int16 aExp, bits32 a ) static bits32 estimateSqrt32( int16 aExp, bits32 a )
{ {
static const bits16 sqrtOddAdjustments[] = { static const bits16 sqrtOddAdjustments[] = {
@ -546,12 +532,11 @@ static bits32 estimateSqrt32( int16 aExp, bits32 a )
} }
/* /*----------------------------------------------------------------------------
------------------------------------------------------------------------------- | Returns the number of leading 0 bits before the most-significant 1 bit of
Returns the number of leading 0 bits before the most-significant 1 bit | `a'. If `a' is zero, 32 is returned.
of `a'. If `a' is zero, 32 is returned. *----------------------------------------------------------------------------*/
-------------------------------------------------------------------------------
*/
static int8 countLeadingZeros32( bits32 a ) static int8 countLeadingZeros32( bits32 a )
{ {
static const int8 countLeadingZerosHigh[] = { static const int8 countLeadingZerosHigh[] = {
@ -588,13 +573,12 @@ static int8 countLeadingZeros32( bits32 a )
} }
/* /*----------------------------------------------------------------------------
------------------------------------------------------------------------------- | Returns 1 if the 64-bit value formed by concatenating `a0' and `a1' is
Returns 1 if the 64-bit value formed by concatenating `a0' and `a1' is equal | equal to the 64-bit value formed by concatenating `b0' and `b1'. Otherwise,
to the 64-bit value formed by concatenating `b0' and `b1'. Otherwise, | returns 0.
returns 0. *----------------------------------------------------------------------------*/
-------------------------------------------------------------------------------
*/
INLINE flag eq64( bits32 a0, bits32 a1, bits32 b0, bits32 b1 ) INLINE flag eq64( bits32 a0, bits32 a1, bits32 b0, bits32 b1 )
{ {
@ -602,13 +586,12 @@ INLINE flag eq64( bits32 a0, bits32 a1, bits32 b0, bits32 b1 )
} }
/* /*----------------------------------------------------------------------------
------------------------------------------------------------------------------- | Returns 1 if the 64-bit value formed by concatenating `a0' and `a1' is less
Returns 1 if the 64-bit value formed by concatenating `a0' and `a1' is less | than or equal to the 64-bit value formed by concatenating `b0' and `b1'.
than or equal to the 64-bit value formed by concatenating `b0' and `b1'. | Otherwise, returns 0.
Otherwise, returns 0. *----------------------------------------------------------------------------*/
-------------------------------------------------------------------------------
*/
INLINE flag le64( bits32 a0, bits32 a1, bits32 b0, bits32 b1 ) INLINE flag le64( bits32 a0, bits32 a1, bits32 b0, bits32 b1 )
{ {
@ -616,13 +599,12 @@ INLINE flag le64( bits32 a0, bits32 a1, bits32 b0, bits32 b1 )
} }
/* /*----------------------------------------------------------------------------
------------------------------------------------------------------------------- | Returns 1 if the 64-bit value formed by concatenating `a0' and `a1' is less
Returns 1 if the 64-bit value formed by concatenating `a0' and `a1' is less | than the 64-bit value formed by concatenating `b0' and `b1'. Otherwise,
than the 64-bit value formed by concatenating `b0' and `b1'. Otherwise, | returns 0.
returns 0. *----------------------------------------------------------------------------*/
-------------------------------------------------------------------------------
*/
INLINE flag lt64( bits32 a0, bits32 a1, bits32 b0, bits32 b1 ) INLINE flag lt64( bits32 a0, bits32 a1, bits32 b0, bits32 b1 )
{ {
@ -630,13 +612,12 @@ INLINE flag lt64( bits32 a0, bits32 a1, bits32 b0, bits32 b1 )
} }
/* /*----------------------------------------------------------------------------
------------------------------------------------------------------------------- | Returns 1 if the 64-bit value formed by concatenating `a0' and `a1' is not
Returns 1 if the 64-bit value formed by concatenating `a0' and `a1' is not | equal to the 64-bit value formed by concatenating `b0' and `b1'. Otherwise,
equal to the 64-bit value formed by concatenating `b0' and `b1'. Otherwise, | returns 0.
returns 0. *----------------------------------------------------------------------------*/
-------------------------------------------------------------------------------
*/
INLINE flag ne64( bits32 a0, bits32 a1, bits32 b0, bits32 b1 ) INLINE flag ne64( bits32 a0, bits32 a1, bits32 b0, bits32 b1 )
{ {

View file

@ -1,9 +1,8 @@
/* /*============================================================================
===============================================================================
This C source fragment is part of the SoftFloat IEC/IEEE Floating-point This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
Arithmetic Package, Release 2. Arithmetic Package, Release 2b.
Written by John R. Hauser. This work was made possible in part by the Written by John R. Hauser. This work was made possible in part by the
International Computer Science Institute, located at Suite 600, 1947 Center International Computer Science Institute, located at Suite 600, 1947 Center
@ -12,39 +11,38 @@ National Science Foundation under grant MIP-9311980. The original version
of this code was written as part of a project to build a fixed-point vector of this code was written as part of a project to build a fixed-point vector
processor in collaboration with the University of California at Berkeley, processor in collaboration with the University of California at Berkeley,
overseen by Profs. Nelson Morgan and John Wawrzynek. More information overseen by Profs. Nelson Morgan and John Wawrzynek. More information
is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
arithmetic/softfloat.html'. arithmetic/SoftFloat.html'.
THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has
has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
Derivative works are acceptable, even for commercial purposes, so long as Derivative works are acceptable, even for commercial purposes, so long as
(1) they include prominent notice that the work is derivative, and (2) they (1) the source code for the derivative work includes prominent notice that
include prominent notice akin to these three paragraphs for those parts of the work is derivative, and (2) the source code includes prominent notice with
this code that are retained. these four paragraphs for those parts of this code that are retained.
=============================================================================== =============================================================================*/
*/
/* /*----------------------------------------------------------------------------
------------------------------------------------------------------------------- | Underflow tininess-detection mode, statically initialized to default value.
Underflow tininess-detection mode, statically initialized to default value. | (The declaration in `softfloat.h' must match the `int8' type here.)
(The declaration in `softfloat.h' must match the `int8' type here.) *----------------------------------------------------------------------------*/
-------------------------------------------------------------------------------
*/
int8 float_detect_tininess = float_tininess_after_rounding; int8 float_detect_tininess = float_tininess_after_rounding;
/* /*----------------------------------------------------------------------------
------------------------------------------------------------------------------- | Raises the exceptions specified by `flags'. Floating-point traps can be
Raises the exceptions specified by `flags'. Floating-point traps can be | defined here if desired. It is currently not possible for such a trap
defined here if desired. It is currently not possible for such a trap to | to substitute a result value. If traps are not implemented, this routine
substitute a result value. If traps are not implemented, this routine | should be simply `float_exception_flags |= flags;'.
should be simply `float_exception_flags |= flags;'. *----------------------------------------------------------------------------*/
-------------------------------------------------------------------------------
*/
void float_raise( int8 flags ) void float_raise( int8 flags )
{ {
@ -52,31 +50,26 @@ void float_raise( int8 flags )
} }
/* /*----------------------------------------------------------------------------
------------------------------------------------------------------------------- | Internal canonical NaN format.
Internal canonical NaN format. *----------------------------------------------------------------------------*/
-------------------------------------------------------------------------------
*/
typedef struct { typedef struct {
flag sign; flag sign;
bits32 high, low; bits32 high, low;
} commonNaNT; } commonNaNT;
/* /*----------------------------------------------------------------------------
------------------------------------------------------------------------------- | The pattern for a default generated single-precision NaN.
The pattern for a default generated single-precision NaN. *----------------------------------------------------------------------------*/
-------------------------------------------------------------------------------
*/
enum { enum {
float32_default_nan = 0xFFFFFFFF float32_default_nan = 0xFFFFFFFF
}; };
/* /*----------------------------------------------------------------------------
------------------------------------------------------------------------------- | Returns 1 if the single-precision floating-point value `a' is a NaN;
Returns 1 if the single-precision floating-point value `a' is a NaN; | otherwise returns 0.
otherwise returns 0. *----------------------------------------------------------------------------*/
-------------------------------------------------------------------------------
*/
flag float32_is_nan( float32 a ) flag float32_is_nan( float32 a )
{ {
@ -84,12 +77,11 @@ flag float32_is_nan( float32 a )
} }
/* /*----------------------------------------------------------------------------
------------------------------------------------------------------------------- | Returns 1 if the single-precision floating-point value `a' is a signaling
Returns 1 if the single-precision floating-point value `a' is a signaling | NaN; otherwise returns 0.
NaN; otherwise returns 0. *----------------------------------------------------------------------------*/
-------------------------------------------------------------------------------
*/
flag float32_is_signaling_nan( float32 a ) flag float32_is_signaling_nan( float32 a )
{ {
@ -97,13 +89,42 @@ flag float32_is_signaling_nan( float32 a )
} }
/* /*----------------------------------------------------------------------------
------------------------------------------------------------------------------- | Returns the result of converting the single-precision floating-point NaN
Takes two single-precision floating-point values `a' and `b', one of which | `a' to the canonical NaN format. If `a' is a signaling NaN, the invalid
is a NaN, and returns the appropriate NaN result. If either `a' or `b' is a | exception is raised.
signaling NaN, the invalid exception is raised. *----------------------------------------------------------------------------*/
-------------------------------------------------------------------------------
*/ static commonNaNT float32ToCommonNaN( float32 a )
{
commonNaNT z;
if ( float32_is_signaling_nan( a ) ) float_raise( float_flag_invalid );
z.sign = a>>31;
z.low = 0;
z.high = a<<9;
return z;
}
/*----------------------------------------------------------------------------
| Returns the result of converting the canonical NaN `a' to the single-
| precision floating-point format.
*----------------------------------------------------------------------------*/
static float32 commonNaNToFloat32( commonNaNT a )
{
return ( ( (bits32) a.sign )<<31 ) | 0x7FC00000 | ( a.high>>9 );
}
/*----------------------------------------------------------------------------
| Takes two single-precision floating-point values `a' and `b', one of which
| is a NaN, and returns the appropriate NaN result. If either `a' or `b' is a
| signaling NaN, the invalid exception is raised.
*----------------------------------------------------------------------------*/
static float32 propagateFloat32NaN( float32 a, float32 b ) static float32 propagateFloat32NaN( float32 a, float32 b )
{ {
flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN; flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
@ -123,3 +144,99 @@ static float32 propagateFloat32NaN( float32 a, float32 b )
} }
} }
/*----------------------------------------------------------------------------
| The pattern for a default generated double-precision NaN. The `high' and
| `low' values hold the most- and least-significant bits, respectively.
*----------------------------------------------------------------------------*/
enum {
float64_default_nan_high = 0xFFFFFFFF,
float64_default_nan_low = 0xFFFFFFFF
};
/*----------------------------------------------------------------------------
| Returns 1 if the double-precision floating-point value `a' is a NaN;
| otherwise returns 0.
*----------------------------------------------------------------------------*/
flag float64_is_nan( float64 a )
{
return
( 0xFFE00000 <= (bits32) ( a.high<<1 ) )
&& ( a.low || ( a.high & 0x000FFFFF ) );
}
/*----------------------------------------------------------------------------
| Returns 1 if the double-precision floating-point value `a' is a signaling
| NaN; otherwise returns 0.
*----------------------------------------------------------------------------*/
flag float64_is_signaling_nan( float64 a )
{
return
( ( ( a.high>>19 ) & 0xFFF ) == 0xFFE )
&& ( a.low || ( a.high & 0x0007FFFF ) );
}
/*----------------------------------------------------------------------------
| Returns the result of converting the double-precision floating-point NaN
| `a' to the canonical NaN format. If `a' is a signaling NaN, the invalid
| exception is raised.
*----------------------------------------------------------------------------*/
static commonNaNT float64ToCommonNaN( float64 a )
{
commonNaNT z;
if ( float64_is_signaling_nan( a ) ) float_raise( float_flag_invalid );
z.sign = a.high>>31;
shortShift64Left( a.high, a.low, 12, &z.high, &z.low );
return z;
}
/*----------------------------------------------------------------------------
| Returns the result of converting the canonical NaN `a' to the double-
| precision floating-point format.
*----------------------------------------------------------------------------*/
static float64 commonNaNToFloat64( commonNaNT a )
{
float64 z;
shift64Right( a.high, a.low, 12, &z.high, &z.low );
z.high |= ( ( (bits32) a.sign )<<31 ) | 0x7FF80000;
return z;
}
/*----------------------------------------------------------------------------
| Takes two double-precision floating-point values `a' and `b', one of which
| is a NaN, and returns the appropriate NaN result. If either `a' or `b' is a
| signaling NaN, the invalid exception is raised.
*----------------------------------------------------------------------------*/
static float64 propagateFloat64NaN( float64 a, float64 b )
{
flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
aIsNaN = float64_is_nan( a );
aIsSignalingNaN = float64_is_signaling_nan( a );
bIsNaN = float64_is_nan( b );
bIsSignalingNaN = float64_is_signaling_nan( b );
a.high |= 0x00080000;
b.high |= 0x00080000;
if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid );
if ( aIsNaN ) {
return ( aIsSignalingNaN & bIsNaN ) ? b : a;
}
else {
return b;
}
}

File diff suppressed because it is too large Load diff

View file

@ -1,9 +1,8 @@
/* /*============================================================================
===============================================================================
This C header file is part of the SoftFloat IEC/IEEE Floating-point This C header file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic
Arithmetic Package, Release 2. Package, Release 2b.
Written by John R. Hauser. This work was made possible in part by the Written by John R. Hauser. This work was made possible in part by the
International Computer Science Institute, located at Suite 600, 1947 Center International Computer Science Institute, located at Suite 600, 1947 Center
@ -12,96 +11,87 @@ National Science Foundation under grant MIP-9311980. The original version
of this code was written as part of a project to build a fixed-point vector of this code was written as part of a project to build a fixed-point vector
processor in collaboration with the University of California at Berkeley, processor in collaboration with the University of California at Berkeley,
overseen by Profs. Nelson Morgan and John Wawrzynek. More information overseen by Profs. Nelson Morgan and John Wawrzynek. More information
is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
arithmetic/softfloat.html'. arithmetic/SoftFloat.html'.
THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has
has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
Derivative works are acceptable, even for commercial purposes, so long as Derivative works are acceptable, even for commercial purposes, so long as
(1) they include prominent notice that the work is derivative, and (2) they (1) the source code for the derivative work includes prominent notice that
include prominent notice akin to these three paragraphs for those parts of the work is derivative, and (2) the source code includes prominent notice with
this code that are retained. these four paragraphs for those parts of this code that are retained.
=============================================================================== =============================================================================*/
*/
/* /*----------------------------------------------------------------------------
------------------------------------------------------------------------------- | Software IEC/IEEE floating-point types.
Software IEC/IEEE floating-point types. *----------------------------------------------------------------------------*/
------------------------------------------------------------------------------- typedef bits32 float32;
*/ typedef struct {
typedef unsigned int float32; bits32 high, low;
} float64;
/* /*----------------------------------------------------------------------------
------------------------------------------------------------------------------- | Software IEC/IEEE floating-point underflow tininess-detection mode.
Software IEC/IEEE floating-point underflow tininess-detection mode. *----------------------------------------------------------------------------*/
------------------------------------------------------------------------------- extern int8 float_detect_tininess;
*/
extern int float_detect_tininess;
enum { enum {
float_tininess_after_rounding = 0, float_tininess_after_rounding = 0,
float_tininess_before_rounding = 1 float_tininess_before_rounding = 1
}; };
/* /*----------------------------------------------------------------------------
------------------------------------------------------------------------------- | Software IEC/IEEE floating-point rounding mode.
Software IEC/IEEE floating-point rounding mode. *----------------------------------------------------------------------------*/
------------------------------------------------------------------------------- extern int8 float_rounding_mode;
*/
extern int float_rounding_mode;
enum { enum {
float_round_nearest_even = 0, float_round_nearest_even = 0,
float_round_to_zero = 1, float_round_to_zero = 1,
float_round_up = 2, float_round_down = 2,
float_round_down = 3 float_round_up = 3
}; };
/* /*----------------------------------------------------------------------------
------------------------------------------------------------------------------- | Software IEC/IEEE floating-point exception flags.
Software IEC/IEEE floating-point exception flags. *----------------------------------------------------------------------------*/
------------------------------------------------------------------------------- extern int8 float_exception_flags;
*/
extern int float_exception_flags;
enum { enum {
float_flag_inexact = 1, float_flag_inexact = 1,
float_flag_divbyzero = 2, float_flag_underflow = 2,
float_flag_underflow = 4, float_flag_overflow = 4,
float_flag_overflow = 8, float_flag_divbyzero = 8,
float_flag_invalid = 16 float_flag_invalid = 16
}; };
/* /*----------------------------------------------------------------------------
------------------------------------------------------------------------------- | Routine to raise any or all of the software IEC/IEEE floating-point
Routine to raise any or all of the software IEC/IEEE floating-point | exception flags.
exception flags. *----------------------------------------------------------------------------*/
------------------------------------------------------------------------------- void float_raise( int8 );
*/
void float_raise( int );
/* /*----------------------------------------------------------------------------
------------------------------------------------------------------------------- | Software IEC/IEEE integer-to-floating-point conversion routines.
Software IEC/IEEE integer-to-floating-point conversion routines. *----------------------------------------------------------------------------*/
------------------------------------------------------------------------------- float32 int32_to_float32( int32 );
*/ float64 int32_to_float64( int32 );
float32 int32_to_float32( int );
/* /*----------------------------------------------------------------------------
------------------------------------------------------------------------------- | Software IEC/IEEE single-precision conversion routines.
Software IEC/IEEE single-precision conversion routines. *----------------------------------------------------------------------------*/
------------------------------------------------------------------------------- int32 float32_to_int32( float32 );
*/ int32 float32_to_int32_round_to_zero( float32 );
int float32_to_int32( float32 ); float64 float32_to_float64( float32 );
int float32_to_int32_round_to_zero( float32 );
/* /*----------------------------------------------------------------------------
------------------------------------------------------------------------------- | Software IEC/IEEE single-precision operations.
Software IEC/IEEE single-precision operations. *----------------------------------------------------------------------------*/
-------------------------------------------------------------------------------
*/
float32 float32_round_to_int( float32 ); float32 float32_round_to_int( float32 );
float32 float32_add( float32, float32 ); float32 float32_add( float32, float32 );
float32 float32_sub( float32, float32 ); float32 float32_sub( float32, float32 );
@ -115,6 +105,30 @@ flag float32_lt( float32, float32 );
flag float32_eq_signaling( float32, float32 ); flag float32_eq_signaling( float32, float32 );
flag float32_le_quiet( float32, float32 ); flag float32_le_quiet( float32, float32 );
flag float32_lt_quiet( float32, float32 ); flag float32_lt_quiet( float32, float32 );
flag float32_is_nan( float32 a );
flag float32_is_signaling_nan( float32 ); flag float32_is_signaling_nan( float32 );
/*----------------------------------------------------------------------------
| Software IEC/IEEE double-precision conversion routines.
*----------------------------------------------------------------------------*/
int32 float64_to_int32( float64 );
int32 float64_to_int32_round_to_zero( float64 );
float32 float64_to_float32( float64 );
/*----------------------------------------------------------------------------
| Software IEC/IEEE double-precision operations.
*----------------------------------------------------------------------------*/
float64 float64_round_to_int( float64 );
float64 float64_add( float64, float64 );
float64 float64_sub( float64, float64 );
float64 float64_mul( float64, float64 );
float64 float64_div( float64, float64 );
float64 float64_rem( float64, float64 );
float64 float64_sqrt( float64 );
flag float64_eq( float64, float64 );
flag float64_le( float64, float64 );
flag float64_lt( float64, float64 );
flag float64_eq_signaling( float64, float64 );
flag float64_le_quiet( float64, float64 );
flag float64_lt_quiet( float64, float64 );
flag float64_is_signaling_nan( float64 );