mirror of
https://github.com/enjoy-digital/litex.git
synced 2025-01-04 09:52:26 -05:00
software/libbase: upgrade softfloat to version 2b + add support for more precision
This commit is contained in:
parent
22f7d1716e
commit
97b77945e5
5 changed files with 3268 additions and 1930 deletions
|
@ -1,9 +1,8 @@
|
||||||
|
|
||||||
/*
|
/*============================================================================
|
||||||
===============================================================================
|
|
||||||
|
|
||||||
This C header file is part of the SoftFloat IEC/IEEE Floating-point
|
This C header file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic
|
||||||
Arithmetic Package, Release 2.
|
Package, Release 2b.
|
||||||
|
|
||||||
Written by John R. Hauser. This work was made possible in part by the
|
Written by John R. Hauser. This work was made possible in part by the
|
||||||
International Computer Science Institute, located at Suite 600, 1947 Center
|
International Computer Science Institute, located at Suite 600, 1947 Center
|
||||||
|
@ -12,54 +11,48 @@ National Science Foundation under grant MIP-9311980. The original version
|
||||||
of this code was written as part of a project to build a fixed-point vector
|
of this code was written as part of a project to build a fixed-point vector
|
||||||
processor in collaboration with the University of California at Berkeley,
|
processor in collaboration with the University of California at Berkeley,
|
||||||
overseen by Profs. Nelson Morgan and John Wawrzynek. More information
|
overseen by Profs. Nelson Morgan and John Wawrzynek. More information
|
||||||
is available through the Web page `http://http.cs.berkeley.edu/~jhauser/
|
is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
|
||||||
arithmetic/softfloat.html'.
|
arithmetic/SoftFloat.html'.
|
||||||
|
|
||||||
THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
|
THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has
|
||||||
has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
|
been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
|
||||||
TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
|
RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
|
||||||
PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
|
AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
|
||||||
AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
|
COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
|
||||||
|
EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
|
||||||
|
INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
|
||||||
|
OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
|
||||||
|
|
||||||
Derivative works are acceptable, even for commercial purposes, so long as
|
Derivative works are acceptable, even for commercial purposes, so long as
|
||||||
(1) they include prominent notice that the work is derivative, and (2) they
|
(1) the source code for the derivative work includes prominent notice that
|
||||||
include prominent notice akin to these three paragraphs for those parts of
|
the work is derivative, and (2) the source code includes prominent notice with
|
||||||
this code that are retained.
|
these four paragraphs for those parts of this code that are retained.
|
||||||
|
|
||||||
===============================================================================
|
=============================================================================*/
|
||||||
*/
|
|
||||||
|
|
||||||
/*
|
/*----------------------------------------------------------------------------
|
||||||
-------------------------------------------------------------------------------
|
| Include common integer types and flags.
|
||||||
Common integer types and flags.
|
*----------------------------------------------------------------------------*/
|
||||||
-------------------------------------------------------------------------------
|
|
||||||
*/
|
|
||||||
|
|
||||||
/*
|
/*----------------------------------------------------------------------------
|
||||||
-------------------------------------------------------------------------------
|
| One of the macros `BIGENDIAN' or `LITTLEENDIAN' must be defined.
|
||||||
One of the macros `BIGENDIAN' or `LITTLEENDIAN' must be defined.
|
*----------------------------------------------------------------------------*/
|
||||||
-------------------------------------------------------------------------------
|
|
||||||
*/
|
|
||||||
#define BIGENDIAN
|
#define BIGENDIAN
|
||||||
|
|
||||||
/*
|
/*----------------------------------------------------------------------------
|
||||||
-------------------------------------------------------------------------------
|
| The macro `BITS64' can be defined to indicate that 64-bit integer types are
|
||||||
The macro `BITS64' can be defined to indicate that 64-bit integer types are
|
| supported by the compiler.
|
||||||
supported by the compiler.
|
*----------------------------------------------------------------------------*/
|
||||||
-------------------------------------------------------------------------------
|
|
||||||
*/
|
|
||||||
//#define BITS64
|
//#define BITS64
|
||||||
|
|
||||||
/*
|
/*----------------------------------------------------------------------------
|
||||||
-------------------------------------------------------------------------------
|
| Each of the following `typedef's defines the most convenient type that holds
|
||||||
Each of the following `typedef's defines the most convenient type that holds
|
| integers of at least as many bits as specified. For example, `uint8' should
|
||||||
integers of at least as many bits as specified. For example, `uint8' should
|
| be the most convenient type that can hold unsigned integers of as many as
|
||||||
be the most convenient type that can hold unsigned integers of as many as
|
| 8 bits. The `flag' type must be able to hold either a 0 or 1. For most
|
||||||
8 bits. The `flag' type must be able to hold either a 0 or 1. For most
|
| implementations of C, `flag', `uint8', and `int8' should all be `typedef'ed
|
||||||
implementations of C, `flag', `uint8', and `int8' should all be `typedef'ed
|
| to the same as `int'.
|
||||||
to the same as `int'.
|
*----------------------------------------------------------------------------*/
|
||||||
-------------------------------------------------------------------------------
|
|
||||||
*/
|
|
||||||
typedef int flag;
|
typedef int flag;
|
||||||
typedef int uint8;
|
typedef int uint8;
|
||||||
typedef int int8;
|
typedef int int8;
|
||||||
|
@ -68,18 +61,16 @@ typedef int int16;
|
||||||
typedef unsigned int uint32;
|
typedef unsigned int uint32;
|
||||||
typedef signed int int32;
|
typedef signed int int32;
|
||||||
#ifdef BITS64
|
#ifdef BITS64
|
||||||
typedef unsigned long long int bits64;
|
typedef unsigned long long int uint64;
|
||||||
typedef signed long long int sbits64;
|
typedef signed long long int int64;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/*
|
/*----------------------------------------------------------------------------
|
||||||
-------------------------------------------------------------------------------
|
| Each of the following `typedef's defines a type that holds integers
|
||||||
Each of the following `typedef's defines a type that holds integers
|
| of _exactly_ the number of bits specified. For instance, for most
|
||||||
of _exactly_ the number of bits specified. For instance, for most
|
| implementation of C, `bits16' and `sbits16' should be `typedef'ed to
|
||||||
implementation of C, `bits16' and `sbits16' should be `typedef'ed to
|
| `unsigned short int' and `signed short int' (or `short int'), respectively.
|
||||||
`unsigned short int' and `signed short int' (or `short int'), respectively.
|
*----------------------------------------------------------------------------*/
|
||||||
-------------------------------------------------------------------------------
|
|
||||||
*/
|
|
||||||
typedef unsigned char bits8;
|
typedef unsigned char bits8;
|
||||||
typedef signed char sbits8;
|
typedef signed char sbits8;
|
||||||
typedef unsigned short int bits16;
|
typedef unsigned short int bits16;
|
||||||
|
@ -87,38 +78,33 @@ typedef signed short int sbits16;
|
||||||
typedef unsigned int bits32;
|
typedef unsigned int bits32;
|
||||||
typedef signed int sbits32;
|
typedef signed int sbits32;
|
||||||
#ifdef BITS64
|
#ifdef BITS64
|
||||||
typedef unsigned long long int uint64;
|
typedef unsigned long long int bits64;
|
||||||
typedef signed long long int int64;
|
typedef signed long long int sbits64;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef BITS64
|
#ifdef BITS64
|
||||||
/*
|
/*----------------------------------------------------------------------------
|
||||||
-------------------------------------------------------------------------------
|
| The `LIT64' macro takes as its argument a textual integer literal and
|
||||||
The `LIT64' macro takes as its argument a textual integer literal and if
|
| if necessary ``marks'' the literal as having a 64-bit integer type.
|
||||||
necessary ``marks'' the literal as having a 64-bit integer type. For
|
| For example, the GNU C Compiler (`gcc') requires that 64-bit literals be
|
||||||
example, the Gnu C Compiler (`gcc') requires that 64-bit literals be
|
| appended with the letters `LL' standing for `long long', which is `gcc's
|
||||||
appended with the letters `LL' standing for `long long', which is `gcc's
|
| name for the 64-bit integer type. Some compilers may allow `LIT64' to be
|
||||||
name for the 64-bit integer type. Some compilers may allow `LIT64' to be
|
| defined as the identity macro: `#define LIT64( a ) a'.
|
||||||
defined as the identity macro: `#define LIT64( a ) a'.
|
*----------------------------------------------------------------------------*/
|
||||||
-------------------------------------------------------------------------------
|
|
||||||
*/
|
|
||||||
#define LIT64( a ) a##LL
|
#define LIT64( a ) a##LL
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/*
|
/*----------------------------------------------------------------------------
|
||||||
-------------------------------------------------------------------------------
|
| The macro `INLINE' can be used before functions that should be inlined. If
|
||||||
The macro `INLINE' can be used before functions that should be inlined. If
|
| a compiler does not support explicit inlining, this macro should be defined
|
||||||
a compiler does not support explicit inlining, this macro should be defined
|
| to be `static'.
|
||||||
to be `static'.
|
*----------------------------------------------------------------------------*/
|
||||||
-------------------------------------------------------------------------------
|
|
||||||
*/
|
|
||||||
#define INLINE extern inline
|
#define INLINE extern inline
|
||||||
|
|
||||||
/*
|
|
||||||
-------------------------------------------------------------------------------
|
/*----------------------------------------------------------------------------
|
||||||
Symbolic Boolean literals.
|
| Symbolic Boolean literals.
|
||||||
-------------------------------------------------------------------------------
|
*----------------------------------------------------------------------------*/
|
||||||
*/
|
|
||||||
enum {
|
enum {
|
||||||
FALSE = 0,
|
FALSE = 0,
|
||||||
TRUE = 1
|
TRUE = 1
|
||||||
|
|
|
@ -1,9 +1,8 @@
|
||||||
|
|
||||||
/*
|
/*============================================================================
|
||||||
===============================================================================
|
|
||||||
|
|
||||||
This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
|
This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
|
||||||
Arithmetic Package, Release 2.
|
Arithmetic Package, Release 2b.
|
||||||
|
|
||||||
Written by John R. Hauser. This work was made possible in part by the
|
Written by John R. Hauser. This work was made possible in part by the
|
||||||
International Computer Science Institute, located at Suite 600, 1947 Center
|
International Computer Science Institute, located at Suite 600, 1947 Center
|
||||||
|
@ -12,33 +11,34 @@ National Science Foundation under grant MIP-9311980. The original version
|
||||||
of this code was written as part of a project to build a fixed-point vector
|
of this code was written as part of a project to build a fixed-point vector
|
||||||
processor in collaboration with the University of California at Berkeley,
|
processor in collaboration with the University of California at Berkeley,
|
||||||
overseen by Profs. Nelson Morgan and John Wawrzynek. More information
|
overseen by Profs. Nelson Morgan and John Wawrzynek. More information
|
||||||
is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
|
is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
|
||||||
arithmetic/softfloat.html'.
|
arithmetic/SoftFloat.html'.
|
||||||
|
|
||||||
THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
|
THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has
|
||||||
has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
|
been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
|
||||||
TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
|
RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
|
||||||
PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
|
AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
|
||||||
AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
|
COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
|
||||||
|
EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
|
||||||
|
INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
|
||||||
|
OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
|
||||||
|
|
||||||
Derivative works are acceptable, even for commercial purposes, so long as
|
Derivative works are acceptable, even for commercial purposes, so long as
|
||||||
(1) they include prominent notice that the work is derivative, and (2) they
|
(1) the source code for the derivative work includes prominent notice that
|
||||||
include prominent notice akin to these three paragraphs for those parts of
|
the work is derivative, and (2) the source code includes prominent notice with
|
||||||
this code that are retained.
|
these four paragraphs for those parts of this code that are retained.
|
||||||
|
|
||||||
===============================================================================
|
=============================================================================*/
|
||||||
*/
|
|
||||||
|
/*----------------------------------------------------------------------------
|
||||||
|
| Shifts `a' right by the number of bits given in `count'. If any nonzero
|
||||||
|
| bits are shifted off, they are ``jammed'' into the least significant bit of
|
||||||
|
| the result by setting the least significant bit to 1. The value of `count'
|
||||||
|
| can be arbitrarily large; in particular, if `count' is greater than 32, the
|
||||||
|
| result will be either 0 or 1, depending on whether `a' is zero or nonzero.
|
||||||
|
| The result is stored in the location pointed to by `zPtr'.
|
||||||
|
*----------------------------------------------------------------------------*/
|
||||||
|
|
||||||
/*
|
|
||||||
-------------------------------------------------------------------------------
|
|
||||||
Shifts `a' right by the number of bits given in `count'. If any nonzero
|
|
||||||
bits are shifted off, they are ``jammed'' into the least significant bit of
|
|
||||||
the result by setting the least significant bit to 1. The value of `count'
|
|
||||||
can be arbitrarily large; in particular, if `count' is greater than 32, the
|
|
||||||
result will be either 0 or 1, depending on whether `a' is zero or nonzero.
|
|
||||||
The result is stored in the location pointed to by `zPtr'.
|
|
||||||
-------------------------------------------------------------------------------
|
|
||||||
*/
|
|
||||||
INLINE void shift32RightJamming( bits32 a, int16 count, bits32 *zPtr )
|
INLINE void shift32RightJamming( bits32 a, int16 count, bits32 *zPtr )
|
||||||
{
|
{
|
||||||
bits32 z;
|
bits32 z;
|
||||||
|
@ -56,15 +56,14 @@ INLINE void shift32RightJamming( bits32 a, int16 count, bits32 *zPtr )
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*----------------------------------------------------------------------------
|
||||||
-------------------------------------------------------------------------------
|
| Shifts the 64-bit value formed by concatenating `a0' and `a1' right by the
|
||||||
Shifts the 64-bit value formed by concatenating `a0' and `a1' right by the
|
| number of bits given in `count'. Any bits shifted off are lost. The value
|
||||||
number of bits given in `count'. Any bits shifted off are lost. The value
|
| of `count' can be arbitrarily large; in particular, if `count' is greater
|
||||||
of `count' can be arbitrarily large; in particular, if `count' is greater
|
| than 64, the result will be 0. The result is broken into two 32-bit pieces
|
||||||
than 64, the result will be 0. The result is broken into two 32-bit pieces
|
| which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
|
||||||
which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
|
*----------------------------------------------------------------------------*/
|
||||||
-------------------------------------------------------------------------------
|
|
||||||
*/
|
|
||||||
INLINE void
|
INLINE void
|
||||||
shift64Right(
|
shift64Right(
|
||||||
bits32 a0, bits32 a1, int16 count, bits32 *z0Ptr, bits32 *z1Ptr )
|
bits32 a0, bits32 a1, int16 count, bits32 *z0Ptr, bits32 *z1Ptr )
|
||||||
|
@ -89,18 +88,17 @@ INLINE void
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*----------------------------------------------------------------------------
|
||||||
-------------------------------------------------------------------------------
|
| Shifts the 64-bit value formed by concatenating `a0' and `a1' right by the
|
||||||
Shifts the 64-bit value formed by concatenating `a0' and `a1' right by the
|
| number of bits given in `count'. If any nonzero bits are shifted off, they
|
||||||
number of bits given in `count'. If any nonzero bits are shifted off, they
|
| are ``jammed'' into the least significant bit of the result by setting the
|
||||||
are ``jammed'' into the least significant bit of the result by setting the
|
| least significant bit to 1. The value of `count' can be arbitrarily large;
|
||||||
least significant bit to 1. The value of `count' can be arbitrarily large;
|
| in particular, if `count' is greater than 64, the result will be either 0
|
||||||
in particular, if `count' is greater than 64, the result will be either 0
|
| or 1, depending on whether the concatenation of `a0' and `a1' is zero or
|
||||||
or 1, depending on whether the concatenation of `a0' and `a1' is zero or
|
| nonzero. The result is broken into two 32-bit pieces which are stored at
|
||||||
nonzero. The result is broken into two 32-bit pieces which are stored at
|
| the locations pointed to by `z0Ptr' and `z1Ptr'.
|
||||||
the locations pointed to by `z0Ptr' and `z1Ptr'.
|
*----------------------------------------------------------------------------*/
|
||||||
-------------------------------------------------------------------------------
|
|
||||||
*/
|
|
||||||
INLINE void
|
INLINE void
|
||||||
shift64RightJamming(
|
shift64RightJamming(
|
||||||
bits32 a0, bits32 a1, int16 count, bits32 *z0Ptr, bits32 *z1Ptr )
|
bits32 a0, bits32 a1, int16 count, bits32 *z0Ptr, bits32 *z1Ptr )
|
||||||
|
@ -133,26 +131,25 @@ INLINE void
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*----------------------------------------------------------------------------
|
||||||
-------------------------------------------------------------------------------
|
| Shifts the 96-bit value formed by concatenating `a0', `a1', and `a2' right
|
||||||
Shifts the 96-bit value formed by concatenating `a0', `a1', and `a2' right
|
| by 32 _plus_ the number of bits given in `count'. The shifted result is
|
||||||
by 32 _plus_ the number of bits given in `count'. The shifted result is
|
| at most 64 nonzero bits; these are broken into two 32-bit pieces which are
|
||||||
at most 64 nonzero bits; these are broken into two 32-bit pieces which are
|
| stored at the locations pointed to by `z0Ptr' and `z1Ptr'. The bits shifted
|
||||||
stored at the locations pointed to by `z0Ptr' and `z1Ptr'. The bits shifted
|
| off form a third 32-bit result as follows: The _last_ bit shifted off is
|
||||||
off form a third 32-bit result as follows: The _last_ bit shifted off is
|
| the most-significant bit of the extra result, and the other 31 bits of the
|
||||||
the most-significant bit of the extra result, and the other 31 bits of the
|
| extra result are all zero if and only if _all_but_the_last_ bits shifted off
|
||||||
extra result are all zero if and only if _all_but_the_last_ bits shifted off
|
| were all zero. This extra result is stored in the location pointed to by
|
||||||
were all zero. This extra result is stored in the location pointed to by
|
| `z2Ptr'. The value of `count' can be arbitrarily large.
|
||||||
`z2Ptr'. The value of `count' can be arbitrarily large.
|
| (This routine makes more sense if `a0', `a1', and `a2' are considered
|
||||||
(This routine makes more sense if `a0', `a1', and `a2' are considered
|
| to form a fixed-point value with binary point between `a1' and `a2'. This
|
||||||
to form a fixed-point value with binary point between `a1' and `a2'. This
|
| fixed-point value is shifted right by the number of bits given in `count',
|
||||||
fixed-point value is shifted right by the number of bits given in `count',
|
| and the integer part of the result is returned at the locations pointed to
|
||||||
and the integer part of the result is returned at the locations pointed to
|
| by `z0Ptr' and `z1Ptr'. The fractional part of the result may be slightly
|
||||||
by `z0Ptr' and `z1Ptr'. The fractional part of the result may be slightly
|
| corrupted as described above, and is returned at the location pointed to by
|
||||||
corrupted as described above, and is returned at the location pointed to by
|
| `z2Ptr'.)
|
||||||
`z2Ptr'.)
|
*----------------------------------------------------------------------------*/
|
||||||
-------------------------------------------------------------------------------
|
|
||||||
*/
|
|
||||||
INLINE void
|
INLINE void
|
||||||
shift64ExtraRightJamming(
|
shift64ExtraRightJamming(
|
||||||
bits32 a0,
|
bits32 a0,
|
||||||
|
@ -204,14 +201,13 @@ INLINE void
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*----------------------------------------------------------------------------
|
||||||
-------------------------------------------------------------------------------
|
| Shifts the 64-bit value formed by concatenating `a0' and `a1' left by the
|
||||||
Shifts the 64-bit value formed by concatenating `a0' and `a1' left by the
|
| number of bits given in `count'. Any bits shifted off are lost. The value
|
||||||
number of bits given in `count'. Any bits shifted off are lost. The value
|
| of `count' must be less than 32. The result is broken into two 32-bit
|
||||||
of `count' must be less than 32. The result is broken into two 32-bit
|
| pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
|
||||||
pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
|
*----------------------------------------------------------------------------*/
|
||||||
-------------------------------------------------------------------------------
|
|
||||||
*/
|
|
||||||
INLINE void
|
INLINE void
|
||||||
shortShift64Left(
|
shortShift64Left(
|
||||||
bits32 a0, bits32 a1, int16 count, bits32 *z0Ptr, bits32 *z1Ptr )
|
bits32 a0, bits32 a1, int16 count, bits32 *z0Ptr, bits32 *z1Ptr )
|
||||||
|
@ -223,15 +219,14 @@ INLINE void
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*----------------------------------------------------------------------------
|
||||||
-------------------------------------------------------------------------------
|
| Shifts the 96-bit value formed by concatenating `a0', `a1', and `a2' left
|
||||||
Shifts the 96-bit value formed by concatenating `a0', `a1', and `a2' left by
|
| by the number of bits given in `count'. Any bits shifted off are lost.
|
||||||
the number of bits given in `count'. Any bits shifted off are lost. The
|
| The value of `count' must be less than 32. The result is broken into three
|
||||||
value of `count' must be less than 32. The result is broken into three
|
| 32-bit pieces which are stored at the locations pointed to by `z0Ptr',
|
||||||
32-bit pieces which are stored at the locations pointed to by `z0Ptr',
|
| `z1Ptr', and `z2Ptr'.
|
||||||
`z1Ptr', and `z2Ptr'.
|
*----------------------------------------------------------------------------*/
|
||||||
-------------------------------------------------------------------------------
|
|
||||||
*/
|
|
||||||
INLINE void
|
INLINE void
|
||||||
shortShift96Left(
|
shortShift96Left(
|
||||||
bits32 a0,
|
bits32 a0,
|
||||||
|
@ -260,14 +255,13 @@ INLINE void
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*----------------------------------------------------------------------------
|
||||||
-------------------------------------------------------------------------------
|
| Adds the 64-bit value formed by concatenating `a0' and `a1' to the 64-bit
|
||||||
Adds the 64-bit value formed by concatenating `a0' and `a1' to the 64-bit
|
| value formed by concatenating `b0' and `b1'. Addition is modulo 2^64, so
|
||||||
value formed by concatenating `b0' and `b1'. Addition is modulo 2^64, so
|
| any carry out is lost. The result is broken into two 32-bit pieces which
|
||||||
any carry out is lost. The result is broken into two 32-bit pieces which
|
| are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
|
||||||
are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
|
*----------------------------------------------------------------------------*/
|
||||||
-------------------------------------------------------------------------------
|
|
||||||
*/
|
|
||||||
INLINE void
|
INLINE void
|
||||||
add64(
|
add64(
|
||||||
bits32 a0, bits32 a1, bits32 b0, bits32 b1, bits32 *z0Ptr, bits32 *z1Ptr )
|
bits32 a0, bits32 a1, bits32 b0, bits32 b1, bits32 *z0Ptr, bits32 *z1Ptr )
|
||||||
|
@ -280,15 +274,14 @@ INLINE void
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*----------------------------------------------------------------------------
|
||||||
-------------------------------------------------------------------------------
|
| Adds the 96-bit value formed by concatenating `a0', `a1', and `a2' to the
|
||||||
Adds the 96-bit value formed by concatenating `a0', `a1', and `a2' to the
|
| 96-bit value formed by concatenating `b0', `b1', and `b2'. Addition is
|
||||||
96-bit value formed by concatenating `b0', `b1', and `b2'. Addition is
|
| modulo 2^96, so any carry out is lost. The result is broken into three
|
||||||
modulo 2^96, so any carry out is lost. The result is broken into three
|
| 32-bit pieces which are stored at the locations pointed to by `z0Ptr',
|
||||||
32-bit pieces which are stored at the locations pointed to by `z0Ptr',
|
| `z1Ptr', and `z2Ptr'.
|
||||||
`z1Ptr', and `z2Ptr'.
|
*----------------------------------------------------------------------------*/
|
||||||
-------------------------------------------------------------------------------
|
|
||||||
*/
|
|
||||||
INLINE void
|
INLINE void
|
||||||
add96(
|
add96(
|
||||||
bits32 a0,
|
bits32 a0,
|
||||||
|
@ -319,15 +312,14 @@ INLINE void
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*----------------------------------------------------------------------------
|
||||||
-------------------------------------------------------------------------------
|
| Subtracts the 64-bit value formed by concatenating `b0' and `b1' from the
|
||||||
Subtracts the 64-bit value formed by concatenating `b0' and `b1' from the
|
| 64-bit value formed by concatenating `a0' and `a1'. Subtraction is modulo
|
||||||
64-bit value formed by concatenating `a0' and `a1'. Subtraction is modulo
|
| 2^64, so any borrow out (carry out) is lost. The result is broken into two
|
||||||
2^64, so any borrow out (carry out) is lost. The result is broken into two
|
| 32-bit pieces which are stored at the locations pointed to by `z0Ptr' and
|
||||||
32-bit pieces which are stored at the locations pointed to by `z0Ptr' and
|
| `z1Ptr'.
|
||||||
`z1Ptr'.
|
*----------------------------------------------------------------------------*/
|
||||||
-------------------------------------------------------------------------------
|
|
||||||
*/
|
|
||||||
INLINE void
|
INLINE void
|
||||||
sub64(
|
sub64(
|
||||||
bits32 a0, bits32 a1, bits32 b0, bits32 b1, bits32 *z0Ptr, bits32 *z1Ptr )
|
bits32 a0, bits32 a1, bits32 b0, bits32 b1, bits32 *z0Ptr, bits32 *z1Ptr )
|
||||||
|
@ -338,15 +330,14 @@ INLINE void
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*----------------------------------------------------------------------------
|
||||||
-------------------------------------------------------------------------------
|
| Subtracts the 96-bit value formed by concatenating `b0', `b1', and `b2' from
|
||||||
Subtracts the 96-bit value formed by concatenating `b0', `b1', and `b2' from
|
| the 96-bit value formed by concatenating `a0', `a1', and `a2'. Subtraction
|
||||||
the 96-bit value formed by concatenating `a0', `a1', and `a2'. Subtraction
|
| is modulo 2^96, so any borrow out (carry out) is lost. The result is broken
|
||||||
is modulo 2^96, so any borrow out (carry out) is lost. The result is broken
|
| into three 32-bit pieces which are stored at the locations pointed to by
|
||||||
into three 32-bit pieces which are stored at the locations pointed to by
|
| `z0Ptr', `z1Ptr', and `z2Ptr'.
|
||||||
`z0Ptr', `z1Ptr', and `z2Ptr'.
|
*----------------------------------------------------------------------------*/
|
||||||
-------------------------------------------------------------------------------
|
|
||||||
*/
|
|
||||||
INLINE void
|
INLINE void
|
||||||
sub96(
|
sub96(
|
||||||
bits32 a0,
|
bits32 a0,
|
||||||
|
@ -377,13 +368,12 @@ INLINE void
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*----------------------------------------------------------------------------
|
||||||
-------------------------------------------------------------------------------
|
| Multiplies `a' by `b' to obtain a 64-bit product. The product is broken
|
||||||
Multiplies `a' by `b' to obtain a 64-bit product. The product is broken
|
| into two 32-bit pieces which are stored at the locations pointed to by
|
||||||
into two 32-bit pieces which are stored at the locations pointed to by
|
| `z0Ptr' and `z1Ptr'.
|
||||||
`z0Ptr' and `z1Ptr'.
|
*----------------------------------------------------------------------------*/
|
||||||
-------------------------------------------------------------------------------
|
|
||||||
*/
|
|
||||||
INLINE void mul32To64( bits32 a, bits32 b, bits32 *z0Ptr, bits32 *z1Ptr )
|
INLINE void mul32To64( bits32 a, bits32 b, bits32 *z0Ptr, bits32 *z1Ptr )
|
||||||
{
|
{
|
||||||
bits16 aHigh, aLow, bHigh, bLow;
|
bits16 aHigh, aLow, bHigh, bLow;
|
||||||
|
@ -407,14 +397,13 @@ INLINE void mul32To64( bits32 a, bits32 b, bits32 *z0Ptr, bits32 *z1Ptr )
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*----------------------------------------------------------------------------
|
||||||
-------------------------------------------------------------------------------
|
| Multiplies the 64-bit value formed by concatenating `a0' and `a1' by `b'
|
||||||
Multiplies the 64-bit value formed by concatenating `a0' and `a1' by `b' to
|
| to obtain a 96-bit product. The product is broken into three 32-bit pieces
|
||||||
obtain a 96-bit product. The product is broken into three 32-bit pieces
|
| which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and
|
||||||
which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and
|
| `z2Ptr'.
|
||||||
`z2Ptr'.
|
*----------------------------------------------------------------------------*/
|
||||||
-------------------------------------------------------------------------------
|
|
||||||
*/
|
|
||||||
INLINE void
|
INLINE void
|
||||||
mul64By32To96(
|
mul64By32To96(
|
||||||
bits32 a0,
|
bits32 a0,
|
||||||
|
@ -436,14 +425,13 @@ INLINE void
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*----------------------------------------------------------------------------
|
||||||
-------------------------------------------------------------------------------
|
| Multiplies the 64-bit value formed by concatenating `a0' and `a1' to the
|
||||||
Multiplies the 64-bit value formed by concatenating `a0' and `a1' to the
|
| 64-bit value formed by concatenating `b0' and `b1' to obtain a 128-bit
|
||||||
64-bit value formed by concatenating `b0' and `b1' to obtain a 128-bit
|
| product. The product is broken into four 32-bit pieces which are stored at
|
||||||
product. The product is broken into four 32-bit pieces which are stored at
|
| the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
|
||||||
the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
|
*----------------------------------------------------------------------------*/
|
||||||
-------------------------------------------------------------------------------
|
|
||||||
*/
|
|
||||||
INLINE void
|
INLINE void
|
||||||
mul64To128(
|
mul64To128(
|
||||||
bits32 a0,
|
bits32 a0,
|
||||||
|
@ -474,16 +462,15 @@ INLINE void
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*----------------------------------------------------------------------------
|
||||||
-------------------------------------------------------------------------------
|
| Returns an approximation to the 32-bit integer quotient obtained by dividing
|
||||||
Returns an approximation to the 32-bit integer quotient obtained by dividing
|
| `b' into the 64-bit value formed by concatenating `a0' and `a1'. The
|
||||||
`b' into the 64-bit value formed by concatenating `a0' and `a1'. The divisor
|
| divisor `b' must be at least 2^31. If q is the exact quotient truncated
|
||||||
`b' must be at least 2^31. If q is the exact quotient truncated toward
|
| toward zero, the approximation returned lies between q and q + 2 inclusive.
|
||||||
zero, the approximation returned lies between q and q + 2 inclusive. If
|
| If the exact quotient q is larger than 32 bits, the maximum positive 32-bit
|
||||||
the exact quotient q is larger than 32 bits, the maximum positive 32-bit
|
| unsigned integer is returned.
|
||||||
unsigned integer is returned.
|
*----------------------------------------------------------------------------*/
|
||||||
-------------------------------------------------------------------------------
|
|
||||||
*/
|
|
||||||
static bits32 estimateDiv64To32( bits32 a0, bits32 a1, bits32 b )
|
static bits32 estimateDiv64To32( bits32 a0, bits32 a1, bits32 b )
|
||||||
{
|
{
|
||||||
bits32 b0, b1;
|
bits32 b0, b1;
|
||||||
|
@ -506,17 +493,16 @@ static bits32 estimateDiv64To32( bits32 a0, bits32 a1, bits32 b )
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*----------------------------------------------------------------------------
|
||||||
-------------------------------------------------------------------------------
|
| Returns an approximation to the square root of the 32-bit significand given
|
||||||
Returns an approximation to the square root of the 32-bit significand given
|
| by `a'. Considered as an integer, `a' must be at least 2^31. If bit 0 of
|
||||||
by `a'. Considered as an integer, `a' must be at least 2^31. If bit 0 of
|
| `aExp' (the least significant bit) is 1, the integer returned approximates
|
||||||
`aExp' (the least significant bit) is 1, the integer returned approximates
|
| 2^31*sqrt(`a'/2^31), where `a' is considered an integer. If bit 0 of `aExp'
|
||||||
2^31*sqrt(`a'/2^31), where `a' is considered an integer. If bit 0 of `aExp'
|
| is 0, the integer returned approximates 2^31*sqrt(`a'/2^30). In either
|
||||||
is 0, the integer returned approximates 2^31*sqrt(`a'/2^30). In either
|
| case, the approximation returned lies strictly within +/-2 of the exact
|
||||||
case, the approximation returned lies strictly within +/-2 of the exact
|
| value.
|
||||||
value.
|
*----------------------------------------------------------------------------*/
|
||||||
-------------------------------------------------------------------------------
|
|
||||||
*/
|
|
||||||
static bits32 estimateSqrt32( int16 aExp, bits32 a )
|
static bits32 estimateSqrt32( int16 aExp, bits32 a )
|
||||||
{
|
{
|
||||||
static const bits16 sqrtOddAdjustments[] = {
|
static const bits16 sqrtOddAdjustments[] = {
|
||||||
|
@ -546,12 +532,11 @@ static bits32 estimateSqrt32( int16 aExp, bits32 a )
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*----------------------------------------------------------------------------
|
||||||
-------------------------------------------------------------------------------
|
| Returns the number of leading 0 bits before the most-significant 1 bit of
|
||||||
Returns the number of leading 0 bits before the most-significant 1 bit
|
| `a'. If `a' is zero, 32 is returned.
|
||||||
of `a'. If `a' is zero, 32 is returned.
|
*----------------------------------------------------------------------------*/
|
||||||
-------------------------------------------------------------------------------
|
|
||||||
*/
|
|
||||||
static int8 countLeadingZeros32( bits32 a )
|
static int8 countLeadingZeros32( bits32 a )
|
||||||
{
|
{
|
||||||
static const int8 countLeadingZerosHigh[] = {
|
static const int8 countLeadingZerosHigh[] = {
|
||||||
|
@ -588,13 +573,12 @@ static int8 countLeadingZeros32( bits32 a )
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*----------------------------------------------------------------------------
|
||||||
-------------------------------------------------------------------------------
|
| Returns 1 if the 64-bit value formed by concatenating `a0' and `a1' is
|
||||||
Returns 1 if the 64-bit value formed by concatenating `a0' and `a1' is equal
|
| equal to the 64-bit value formed by concatenating `b0' and `b1'. Otherwise,
|
||||||
to the 64-bit value formed by concatenating `b0' and `b1'. Otherwise,
|
| returns 0.
|
||||||
returns 0.
|
*----------------------------------------------------------------------------*/
|
||||||
-------------------------------------------------------------------------------
|
|
||||||
*/
|
|
||||||
INLINE flag eq64( bits32 a0, bits32 a1, bits32 b0, bits32 b1 )
|
INLINE flag eq64( bits32 a0, bits32 a1, bits32 b0, bits32 b1 )
|
||||||
{
|
{
|
||||||
|
|
||||||
|
@ -602,13 +586,12 @@ INLINE flag eq64( bits32 a0, bits32 a1, bits32 b0, bits32 b1 )
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*----------------------------------------------------------------------------
|
||||||
-------------------------------------------------------------------------------
|
| Returns 1 if the 64-bit value formed by concatenating `a0' and `a1' is less
|
||||||
Returns 1 if the 64-bit value formed by concatenating `a0' and `a1' is less
|
| than or equal to the 64-bit value formed by concatenating `b0' and `b1'.
|
||||||
than or equal to the 64-bit value formed by concatenating `b0' and `b1'.
|
| Otherwise, returns 0.
|
||||||
Otherwise, returns 0.
|
*----------------------------------------------------------------------------*/
|
||||||
-------------------------------------------------------------------------------
|
|
||||||
*/
|
|
||||||
INLINE flag le64( bits32 a0, bits32 a1, bits32 b0, bits32 b1 )
|
INLINE flag le64( bits32 a0, bits32 a1, bits32 b0, bits32 b1 )
|
||||||
{
|
{
|
||||||
|
|
||||||
|
@ -616,13 +599,12 @@ INLINE flag le64( bits32 a0, bits32 a1, bits32 b0, bits32 b1 )
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*----------------------------------------------------------------------------
|
||||||
-------------------------------------------------------------------------------
|
| Returns 1 if the 64-bit value formed by concatenating `a0' and `a1' is less
|
||||||
Returns 1 if the 64-bit value formed by concatenating `a0' and `a1' is less
|
| than the 64-bit value formed by concatenating `b0' and `b1'. Otherwise,
|
||||||
than the 64-bit value formed by concatenating `b0' and `b1'. Otherwise,
|
| returns 0.
|
||||||
returns 0.
|
*----------------------------------------------------------------------------*/
|
||||||
-------------------------------------------------------------------------------
|
|
||||||
*/
|
|
||||||
INLINE flag lt64( bits32 a0, bits32 a1, bits32 b0, bits32 b1 )
|
INLINE flag lt64( bits32 a0, bits32 a1, bits32 b0, bits32 b1 )
|
||||||
{
|
{
|
||||||
|
|
||||||
|
@ -630,13 +612,12 @@ INLINE flag lt64( bits32 a0, bits32 a1, bits32 b0, bits32 b1 )
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*----------------------------------------------------------------------------
|
||||||
-------------------------------------------------------------------------------
|
| Returns 1 if the 64-bit value formed by concatenating `a0' and `a1' is not
|
||||||
Returns 1 if the 64-bit value formed by concatenating `a0' and `a1' is not
|
| equal to the 64-bit value formed by concatenating `b0' and `b1'. Otherwise,
|
||||||
equal to the 64-bit value formed by concatenating `b0' and `b1'. Otherwise,
|
| returns 0.
|
||||||
returns 0.
|
*----------------------------------------------------------------------------*/
|
||||||
-------------------------------------------------------------------------------
|
|
||||||
*/
|
|
||||||
INLINE flag ne64( bits32 a0, bits32 a1, bits32 b0, bits32 b1 )
|
INLINE flag ne64( bits32 a0, bits32 a1, bits32 b0, bits32 b1 )
|
||||||
{
|
{
|
||||||
|
|
||||||
|
|
|
@ -1,9 +1,8 @@
|
||||||
|
|
||||||
/*
|
/*============================================================================
|
||||||
===============================================================================
|
|
||||||
|
|
||||||
This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
|
This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
|
||||||
Arithmetic Package, Release 2.
|
Arithmetic Package, Release 2b.
|
||||||
|
|
||||||
Written by John R. Hauser. This work was made possible in part by the
|
Written by John R. Hauser. This work was made possible in part by the
|
||||||
International Computer Science Institute, located at Suite 600, 1947 Center
|
International Computer Science Institute, located at Suite 600, 1947 Center
|
||||||
|
@ -12,39 +11,38 @@ National Science Foundation under grant MIP-9311980. The original version
|
||||||
of this code was written as part of a project to build a fixed-point vector
|
of this code was written as part of a project to build a fixed-point vector
|
||||||
processor in collaboration with the University of California at Berkeley,
|
processor in collaboration with the University of California at Berkeley,
|
||||||
overseen by Profs. Nelson Morgan and John Wawrzynek. More information
|
overseen by Profs. Nelson Morgan and John Wawrzynek. More information
|
||||||
is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
|
is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
|
||||||
arithmetic/softfloat.html'.
|
arithmetic/SoftFloat.html'.
|
||||||
|
|
||||||
THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
|
THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has
|
||||||
has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
|
been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
|
||||||
TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
|
RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
|
||||||
PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
|
AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
|
||||||
AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
|
COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
|
||||||
|
EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
|
||||||
|
INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
|
||||||
|
OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
|
||||||
|
|
||||||
Derivative works are acceptable, even for commercial purposes, so long as
|
Derivative works are acceptable, even for commercial purposes, so long as
|
||||||
(1) they include prominent notice that the work is derivative, and (2) they
|
(1) the source code for the derivative work includes prominent notice that
|
||||||
include prominent notice akin to these three paragraphs for those parts of
|
the work is derivative, and (2) the source code includes prominent notice with
|
||||||
this code that are retained.
|
these four paragraphs for those parts of this code that are retained.
|
||||||
|
|
||||||
===============================================================================
|
=============================================================================*/
|
||||||
*/
|
|
||||||
|
|
||||||
/*
|
/*----------------------------------------------------------------------------
|
||||||
-------------------------------------------------------------------------------
|
| Underflow tininess-detection mode, statically initialized to default value.
|
||||||
Underflow tininess-detection mode, statically initialized to default value.
|
| (The declaration in `softfloat.h' must match the `int8' type here.)
|
||||||
(The declaration in `softfloat.h' must match the `int8' type here.)
|
*----------------------------------------------------------------------------*/
|
||||||
-------------------------------------------------------------------------------
|
|
||||||
*/
|
|
||||||
int8 float_detect_tininess = float_tininess_after_rounding;
|
int8 float_detect_tininess = float_tininess_after_rounding;
|
||||||
|
|
||||||
/*
|
/*----------------------------------------------------------------------------
|
||||||
-------------------------------------------------------------------------------
|
| Raises the exceptions specified by `flags'. Floating-point traps can be
|
||||||
Raises the exceptions specified by `flags'. Floating-point traps can be
|
| defined here if desired. It is currently not possible for such a trap
|
||||||
defined here if desired. It is currently not possible for such a trap to
|
| to substitute a result value. If traps are not implemented, this routine
|
||||||
substitute a result value. If traps are not implemented, this routine
|
| should be simply `float_exception_flags |= flags;'.
|
||||||
should be simply `float_exception_flags |= flags;'.
|
*----------------------------------------------------------------------------*/
|
||||||
-------------------------------------------------------------------------------
|
|
||||||
*/
|
|
||||||
void float_raise( int8 flags )
|
void float_raise( int8 flags )
|
||||||
{
|
{
|
||||||
|
|
||||||
|
@ -52,31 +50,26 @@ void float_raise( int8 flags )
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*----------------------------------------------------------------------------
|
||||||
-------------------------------------------------------------------------------
|
| Internal canonical NaN format.
|
||||||
Internal canonical NaN format.
|
*----------------------------------------------------------------------------*/
|
||||||
-------------------------------------------------------------------------------
|
|
||||||
*/
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
flag sign;
|
flag sign;
|
||||||
bits32 high, low;
|
bits32 high, low;
|
||||||
} commonNaNT;
|
} commonNaNT;
|
||||||
|
|
||||||
/*
|
/*----------------------------------------------------------------------------
|
||||||
-------------------------------------------------------------------------------
|
| The pattern for a default generated single-precision NaN.
|
||||||
The pattern for a default generated single-precision NaN.
|
*----------------------------------------------------------------------------*/
|
||||||
-------------------------------------------------------------------------------
|
|
||||||
*/
|
|
||||||
enum {
|
enum {
|
||||||
float32_default_nan = 0xFFFFFFFF
|
float32_default_nan = 0xFFFFFFFF
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*----------------------------------------------------------------------------
|
||||||
-------------------------------------------------------------------------------
|
| Returns 1 if the single-precision floating-point value `a' is a NaN;
|
||||||
Returns 1 if the single-precision floating-point value `a' is a NaN;
|
| otherwise returns 0.
|
||||||
otherwise returns 0.
|
*----------------------------------------------------------------------------*/
|
||||||
-------------------------------------------------------------------------------
|
|
||||||
*/
|
|
||||||
flag float32_is_nan( float32 a )
|
flag float32_is_nan( float32 a )
|
||||||
{
|
{
|
||||||
|
|
||||||
|
@ -84,12 +77,11 @@ flag float32_is_nan( float32 a )
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*----------------------------------------------------------------------------
|
||||||
-------------------------------------------------------------------------------
|
| Returns 1 if the single-precision floating-point value `a' is a signaling
|
||||||
Returns 1 if the single-precision floating-point value `a' is a signaling
|
| NaN; otherwise returns 0.
|
||||||
NaN; otherwise returns 0.
|
*----------------------------------------------------------------------------*/
|
||||||
-------------------------------------------------------------------------------
|
|
||||||
*/
|
|
||||||
flag float32_is_signaling_nan( float32 a )
|
flag float32_is_signaling_nan( float32 a )
|
||||||
{
|
{
|
||||||
|
|
||||||
|
@ -97,13 +89,42 @@ flag float32_is_signaling_nan( float32 a )
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*----------------------------------------------------------------------------
|
||||||
-------------------------------------------------------------------------------
|
| Returns the result of converting the single-precision floating-point NaN
|
||||||
Takes two single-precision floating-point values `a' and `b', one of which
|
| `a' to the canonical NaN format. If `a' is a signaling NaN, the invalid
|
||||||
is a NaN, and returns the appropriate NaN result. If either `a' or `b' is a
|
| exception is raised.
|
||||||
signaling NaN, the invalid exception is raised.
|
*----------------------------------------------------------------------------*/
|
||||||
-------------------------------------------------------------------------------
|
|
||||||
*/
|
static commonNaNT float32ToCommonNaN( float32 a )
|
||||||
|
{
|
||||||
|
commonNaNT z;
|
||||||
|
|
||||||
|
if ( float32_is_signaling_nan( a ) ) float_raise( float_flag_invalid );
|
||||||
|
z.sign = a>>31;
|
||||||
|
z.low = 0;
|
||||||
|
z.high = a<<9;
|
||||||
|
return z;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/*----------------------------------------------------------------------------
|
||||||
|
| Returns the result of converting the canonical NaN `a' to the single-
|
||||||
|
| precision floating-point format.
|
||||||
|
*----------------------------------------------------------------------------*/
|
||||||
|
|
||||||
|
static float32 commonNaNToFloat32( commonNaNT a )
|
||||||
|
{
|
||||||
|
|
||||||
|
return ( ( (bits32) a.sign )<<31 ) | 0x7FC00000 | ( a.high>>9 );
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/*----------------------------------------------------------------------------
|
||||||
|
| Takes two single-precision floating-point values `a' and `b', one of which
|
||||||
|
| is a NaN, and returns the appropriate NaN result. If either `a' or `b' is a
|
||||||
|
| signaling NaN, the invalid exception is raised.
|
||||||
|
*----------------------------------------------------------------------------*/
|
||||||
|
|
||||||
static float32 propagateFloat32NaN( float32 a, float32 b )
|
static float32 propagateFloat32NaN( float32 a, float32 b )
|
||||||
{
|
{
|
||||||
flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
|
flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
|
||||||
|
@ -123,3 +144,99 @@ static float32 propagateFloat32NaN( float32 a, float32 b )
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*----------------------------------------------------------------------------
|
||||||
|
| The pattern for a default generated double-precision NaN. The `high' and
|
||||||
|
| `low' values hold the most- and least-significant bits, respectively.
|
||||||
|
*----------------------------------------------------------------------------*/
|
||||||
|
enum {
|
||||||
|
float64_default_nan_high = 0xFFFFFFFF,
|
||||||
|
float64_default_nan_low = 0xFFFFFFFF
|
||||||
|
};
|
||||||
|
|
||||||
|
/*----------------------------------------------------------------------------
|
||||||
|
| Returns 1 if the double-precision floating-point value `a' is a NaN;
|
||||||
|
| otherwise returns 0.
|
||||||
|
*----------------------------------------------------------------------------*/
|
||||||
|
|
||||||
|
flag float64_is_nan( float64 a )
|
||||||
|
{
|
||||||
|
|
||||||
|
return
|
||||||
|
( 0xFFE00000 <= (bits32) ( a.high<<1 ) )
|
||||||
|
&& ( a.low || ( a.high & 0x000FFFFF ) );
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/*----------------------------------------------------------------------------
|
||||||
|
| Returns 1 if the double-precision floating-point value `a' is a signaling
|
||||||
|
| NaN; otherwise returns 0.
|
||||||
|
*----------------------------------------------------------------------------*/
|
||||||
|
|
||||||
|
flag float64_is_signaling_nan( float64 a )
|
||||||
|
{
|
||||||
|
|
||||||
|
return
|
||||||
|
( ( ( a.high>>19 ) & 0xFFF ) == 0xFFE )
|
||||||
|
&& ( a.low || ( a.high & 0x0007FFFF ) );
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/*----------------------------------------------------------------------------
|
||||||
|
| Returns the result of converting the double-precision floating-point NaN
|
||||||
|
| `a' to the canonical NaN format. If `a' is a signaling NaN, the invalid
|
||||||
|
| exception is raised.
|
||||||
|
*----------------------------------------------------------------------------*/
|
||||||
|
|
||||||
|
static commonNaNT float64ToCommonNaN( float64 a )
|
||||||
|
{
|
||||||
|
commonNaNT z;
|
||||||
|
|
||||||
|
if ( float64_is_signaling_nan( a ) ) float_raise( float_flag_invalid );
|
||||||
|
z.sign = a.high>>31;
|
||||||
|
shortShift64Left( a.high, a.low, 12, &z.high, &z.low );
|
||||||
|
return z;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/*----------------------------------------------------------------------------
|
||||||
|
| Returns the result of converting the canonical NaN `a' to the double-
|
||||||
|
| precision floating-point format.
|
||||||
|
*----------------------------------------------------------------------------*/
|
||||||
|
|
||||||
|
static float64 commonNaNToFloat64( commonNaNT a )
|
||||||
|
{
|
||||||
|
float64 z;
|
||||||
|
|
||||||
|
shift64Right( a.high, a.low, 12, &z.high, &z.low );
|
||||||
|
z.high |= ( ( (bits32) a.sign )<<31 ) | 0x7FF80000;
|
||||||
|
return z;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/*----------------------------------------------------------------------------
|
||||||
|
| Takes two double-precision floating-point values `a' and `b', one of which
|
||||||
|
| is a NaN, and returns the appropriate NaN result. If either `a' or `b' is a
|
||||||
|
| signaling NaN, the invalid exception is raised.
|
||||||
|
*----------------------------------------------------------------------------*/
|
||||||
|
|
||||||
|
static float64 propagateFloat64NaN( float64 a, float64 b )
|
||||||
|
{
|
||||||
|
flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
|
||||||
|
|
||||||
|
aIsNaN = float64_is_nan( a );
|
||||||
|
aIsSignalingNaN = float64_is_signaling_nan( a );
|
||||||
|
bIsNaN = float64_is_nan( b );
|
||||||
|
bIsSignalingNaN = float64_is_signaling_nan( b );
|
||||||
|
a.high |= 0x00080000;
|
||||||
|
b.high |= 0x00080000;
|
||||||
|
if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid );
|
||||||
|
if ( aIsNaN ) {
|
||||||
|
return ( aIsSignalingNaN & bIsNaN ) ? b : a;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
return b;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -1,9 +1,8 @@
|
||||||
|
|
||||||
/*
|
/*============================================================================
|
||||||
===============================================================================
|
|
||||||
|
|
||||||
This C header file is part of the SoftFloat IEC/IEEE Floating-point
|
This C header file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic
|
||||||
Arithmetic Package, Release 2.
|
Package, Release 2b.
|
||||||
|
|
||||||
Written by John R. Hauser. This work was made possible in part by the
|
Written by John R. Hauser. This work was made possible in part by the
|
||||||
International Computer Science Institute, located at Suite 600, 1947 Center
|
International Computer Science Institute, located at Suite 600, 1947 Center
|
||||||
|
@ -12,96 +11,87 @@ National Science Foundation under grant MIP-9311980. The original version
|
||||||
of this code was written as part of a project to build a fixed-point vector
|
of this code was written as part of a project to build a fixed-point vector
|
||||||
processor in collaboration with the University of California at Berkeley,
|
processor in collaboration with the University of California at Berkeley,
|
||||||
overseen by Profs. Nelson Morgan and John Wawrzynek. More information
|
overseen by Profs. Nelson Morgan and John Wawrzynek. More information
|
||||||
is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
|
is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
|
||||||
arithmetic/softfloat.html'.
|
arithmetic/SoftFloat.html'.
|
||||||
|
|
||||||
THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
|
THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has
|
||||||
has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
|
been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
|
||||||
TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
|
RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
|
||||||
PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
|
AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
|
||||||
AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
|
COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
|
||||||
|
EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
|
||||||
|
INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
|
||||||
|
OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
|
||||||
|
|
||||||
Derivative works are acceptable, even for commercial purposes, so long as
|
Derivative works are acceptable, even for commercial purposes, so long as
|
||||||
(1) they include prominent notice that the work is derivative, and (2) they
|
(1) the source code for the derivative work includes prominent notice that
|
||||||
include prominent notice akin to these three paragraphs for those parts of
|
the work is derivative, and (2) the source code includes prominent notice with
|
||||||
this code that are retained.
|
these four paragraphs for those parts of this code that are retained.
|
||||||
|
|
||||||
===============================================================================
|
=============================================================================*/
|
||||||
*/
|
|
||||||
|
|
||||||
/*
|
/*----------------------------------------------------------------------------
|
||||||
-------------------------------------------------------------------------------
|
| Software IEC/IEEE floating-point types.
|
||||||
Software IEC/IEEE floating-point types.
|
*----------------------------------------------------------------------------*/
|
||||||
-------------------------------------------------------------------------------
|
typedef bits32 float32;
|
||||||
*/
|
typedef struct {
|
||||||
typedef unsigned int float32;
|
bits32 high, low;
|
||||||
|
} float64;
|
||||||
|
|
||||||
/*
|
/*----------------------------------------------------------------------------
|
||||||
-------------------------------------------------------------------------------
|
| Software IEC/IEEE floating-point underflow tininess-detection mode.
|
||||||
Software IEC/IEEE floating-point underflow tininess-detection mode.
|
*----------------------------------------------------------------------------*/
|
||||||
-------------------------------------------------------------------------------
|
extern int8 float_detect_tininess;
|
||||||
*/
|
|
||||||
extern int float_detect_tininess;
|
|
||||||
enum {
|
enum {
|
||||||
float_tininess_after_rounding = 0,
|
float_tininess_after_rounding = 0,
|
||||||
float_tininess_before_rounding = 1
|
float_tininess_before_rounding = 1
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*----------------------------------------------------------------------------
|
||||||
-------------------------------------------------------------------------------
|
| Software IEC/IEEE floating-point rounding mode.
|
||||||
Software IEC/IEEE floating-point rounding mode.
|
*----------------------------------------------------------------------------*/
|
||||||
-------------------------------------------------------------------------------
|
extern int8 float_rounding_mode;
|
||||||
*/
|
|
||||||
extern int float_rounding_mode;
|
|
||||||
enum {
|
enum {
|
||||||
float_round_nearest_even = 0,
|
float_round_nearest_even = 0,
|
||||||
float_round_to_zero = 1,
|
float_round_to_zero = 1,
|
||||||
float_round_up = 2,
|
float_round_down = 2,
|
||||||
float_round_down = 3
|
float_round_up = 3
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*----------------------------------------------------------------------------
|
||||||
-------------------------------------------------------------------------------
|
| Software IEC/IEEE floating-point exception flags.
|
||||||
Software IEC/IEEE floating-point exception flags.
|
*----------------------------------------------------------------------------*/
|
||||||
-------------------------------------------------------------------------------
|
extern int8 float_exception_flags;
|
||||||
*/
|
|
||||||
extern int float_exception_flags;
|
|
||||||
enum {
|
enum {
|
||||||
float_flag_inexact = 1,
|
float_flag_inexact = 1,
|
||||||
float_flag_divbyzero = 2,
|
float_flag_underflow = 2,
|
||||||
float_flag_underflow = 4,
|
float_flag_overflow = 4,
|
||||||
float_flag_overflow = 8,
|
float_flag_divbyzero = 8,
|
||||||
float_flag_invalid = 16
|
float_flag_invalid = 16
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*----------------------------------------------------------------------------
|
||||||
-------------------------------------------------------------------------------
|
| Routine to raise any or all of the software IEC/IEEE floating-point
|
||||||
Routine to raise any or all of the software IEC/IEEE floating-point
|
| exception flags.
|
||||||
exception flags.
|
*----------------------------------------------------------------------------*/
|
||||||
-------------------------------------------------------------------------------
|
void float_raise( int8 );
|
||||||
*/
|
|
||||||
void float_raise( int );
|
|
||||||
|
|
||||||
/*
|
/*----------------------------------------------------------------------------
|
||||||
-------------------------------------------------------------------------------
|
| Software IEC/IEEE integer-to-floating-point conversion routines.
|
||||||
Software IEC/IEEE integer-to-floating-point conversion routines.
|
*----------------------------------------------------------------------------*/
|
||||||
-------------------------------------------------------------------------------
|
float32 int32_to_float32( int32 );
|
||||||
*/
|
float64 int32_to_float64( int32 );
|
||||||
float32 int32_to_float32( int );
|
|
||||||
|
|
||||||
/*
|
/*----------------------------------------------------------------------------
|
||||||
-------------------------------------------------------------------------------
|
| Software IEC/IEEE single-precision conversion routines.
|
||||||
Software IEC/IEEE single-precision conversion routines.
|
*----------------------------------------------------------------------------*/
|
||||||
-------------------------------------------------------------------------------
|
int32 float32_to_int32( float32 );
|
||||||
*/
|
int32 float32_to_int32_round_to_zero( float32 );
|
||||||
int float32_to_int32( float32 );
|
float64 float32_to_float64( float32 );
|
||||||
int float32_to_int32_round_to_zero( float32 );
|
|
||||||
|
|
||||||
/*
|
/*----------------------------------------------------------------------------
|
||||||
-------------------------------------------------------------------------------
|
| Software IEC/IEEE single-precision operations.
|
||||||
Software IEC/IEEE single-precision operations.
|
*----------------------------------------------------------------------------*/
|
||||||
-------------------------------------------------------------------------------
|
|
||||||
*/
|
|
||||||
float32 float32_round_to_int( float32 );
|
float32 float32_round_to_int( float32 );
|
||||||
float32 float32_add( float32, float32 );
|
float32 float32_add( float32, float32 );
|
||||||
float32 float32_sub( float32, float32 );
|
float32 float32_sub( float32, float32 );
|
||||||
|
@ -115,6 +105,30 @@ flag float32_lt( float32, float32 );
|
||||||
flag float32_eq_signaling( float32, float32 );
|
flag float32_eq_signaling( float32, float32 );
|
||||||
flag float32_le_quiet( float32, float32 );
|
flag float32_le_quiet( float32, float32 );
|
||||||
flag float32_lt_quiet( float32, float32 );
|
flag float32_lt_quiet( float32, float32 );
|
||||||
flag float32_is_nan( float32 a );
|
|
||||||
flag float32_is_signaling_nan( float32 );
|
flag float32_is_signaling_nan( float32 );
|
||||||
|
|
||||||
|
/*----------------------------------------------------------------------------
|
||||||
|
| Software IEC/IEEE double-precision conversion routines.
|
||||||
|
*----------------------------------------------------------------------------*/
|
||||||
|
int32 float64_to_int32( float64 );
|
||||||
|
int32 float64_to_int32_round_to_zero( float64 );
|
||||||
|
float32 float64_to_float32( float64 );
|
||||||
|
|
||||||
|
/*----------------------------------------------------------------------------
|
||||||
|
| Software IEC/IEEE double-precision operations.
|
||||||
|
*----------------------------------------------------------------------------*/
|
||||||
|
float64 float64_round_to_int( float64 );
|
||||||
|
float64 float64_add( float64, float64 );
|
||||||
|
float64 float64_sub( float64, float64 );
|
||||||
|
float64 float64_mul( float64, float64 );
|
||||||
|
float64 float64_div( float64, float64 );
|
||||||
|
float64 float64_rem( float64, float64 );
|
||||||
|
float64 float64_sqrt( float64 );
|
||||||
|
flag float64_eq( float64, float64 );
|
||||||
|
flag float64_le( float64, float64 );
|
||||||
|
flag float64_lt( float64, float64 );
|
||||||
|
flag float64_eq_signaling( float64, float64 );
|
||||||
|
flag float64_le_quiet( float64, float64 );
|
||||||
|
flag float64_lt_quiet( float64, float64 );
|
||||||
|
flag float64_is_signaling_nan( float64 );
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue