Improve subnormal/normal rounding

This commit is contained in:
Dolu1990 2021-02-26 16:32:42 +01:00
parent de81da36eb
commit 81c193af1f
4 changed files with 184 additions and 82 deletions

View File

@ -1004,7 +1004,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
output.roundMode := input.roundMode output.roundMode := input.roundMode
output.scrap := norm.scrap output.scrap := norm.scrap
output.value := norm.output output.value := norm.output
output.NV := NV //TODO isn't propagated in FMA output.NV := NV
output.DZ := False output.DZ := False
decode.mulToAdd.valid := input.valid && input.add decode.mulToAdd.valid := input.valid && input.add
@ -1019,6 +1019,10 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
decode.mulToAdd.roundMode := input.roundMode decode.mulToAdd.roundMode := input.roundMode
if (p.withDouble) decode.mulToAdd.format := input.format if (p.withDouble) decode.mulToAdd.format := input.format
when(NV){
decode.mulToAdd.rs1.mantissa.msb := False
}
input.ready := (input.add ? decode.mulToAdd.ready | output.ready) || input.divSqrt input.ready := (input.add ? decode.mulToAdd.ready | output.ready) || input.divSqrt
} }
} }
@ -1558,7 +1562,23 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
val ufThreshold = muxDouble[UInt](input.format)(exponentF64Subnormal-52+1)(exponentF32Subnormal-23+1) val ufThreshold = muxDouble[UInt](input.format)(exponentF64Subnormal-52+1)(exponentF32Subnormal-23+1)
val ofThreshold = muxDouble[UInt](input.format)(exponentF64Infinity-1)(exponentF32Infinity-1) val ofThreshold = muxDouble[UInt](input.format)(exponentF64Infinity-1)(exponentF32Infinity-1)
when(!math.special && math.exponent <= ufSubnormalThreshold && roundAdjusted.asUInt =/= 0){ //Do not catch exact 1.17549435E-38 underflow, but, who realy care ? //catch exact 1.17549435E-38 underflow, but, who realy care ?
// val borringCase = input.value.exponent === ufSubnormalThreshold && roundAdjusted.asUInt < U"11"
// when(!math.special && (math.exponent <= ufSubnormalThreshold || borringCase) && roundAdjusted.asUInt =/= 0){
// uf := True
// }
val threshold = input.roundMode.mux(
FpuRoundMode.RNE -> U"110",
FpuRoundMode.RTZ -> U"110",
FpuRoundMode.RDN -> (input.value.sign ? U"101" | U"111"),
FpuRoundMode.RUP -> (input.value.sign ? U"111" | U"101"),
FpuRoundMode.RMM -> U"110"
)
val borringRound = (input.value.mantissa(1 downto 0) ## input.scrap)
if(p.withDouble) when(input.format === FpuFormat.FLOAT) { borringRound := (input.value.mantissa(30 downto 29) ## input.value.mantissa(28 downto 0).orR)}
val borringCase = input.value.exponent === ufSubnormalThreshold && borringRound.asUInt < threshold
when(!math.special && (math.exponent <= ufSubnormalThreshold || borringCase) && roundAdjusted.asUInt =/= 0){
uf := True uf := True
} }
when(!math.special && math.exponent > ofThreshold){ when(!math.special && math.exponent > ofThreshold){
@ -1840,6 +1860,11 @@ cat all1.txt | grep "Errors found in"
testfloat -tininessafter -all2 > all2.txt testfloat -tininessafter -all2 > all2.txt
cat all2.txt | grep "Errors found in" cat all2.txt | grep "Errors found in"
testfloat -tininessafter -f32_mulAdd > fma.txt
testfloat -tininessafter -all2 -level 2 -checkall > all2.txt
all1 => all1 =>
Errors found in f32_to_ui64_rx_minMag: Errors found in f32_to_ui64_rx_minMag:
@ -1848,28 +1873,29 @@ Errors found in f64_to_ui64_rx_minMag:
Errors found in f64_to_i64_rx_minMag: Errors found in f64_to_i64_rx_minMag:
all2 => all2 =>
Errors found in f32_add, rounding near_even:
Errors found in f32_add, rounding minMag:
Errors found in f32_add, rounding min: Errors found in f32_mulAdd, rounding min:
Errors found in f32_add, rounding max: +00.7FFFFF +67.000001 -01.000000
Errors found in f32_sub, rounding near_even: => -01.000000 ...ux expected -01.000000 ....x
Errors found in f32_sub, rounding minMag: +67.000001 +00.7FFFFF -01.000000
Errors found in f32_sub, rounding min: => -01.000000 ...ux expected -01.000000 ....x
Errors found in f32_sub, rounding max: -00.7FFFFF -67.000001 -01.000000
Errors found in f32_mul, rounding near_even: => -01.000000 ...ux expected -01.000000 ....x
Errors found in f32_mul, rounding min: -67.000001 -00.7FFFFF -01.000000
Errors found in f32_mul, rounding max: => -01.000000 ...ux expected -01.000000 ....x
Errors found in f32_div, rounding near_even: Errors found in f32_mulAdd, rounding max:
Errors found in f32_div, rounding minMag: +00.7FFFFF -67.000001 +01.000000
Errors found in f32_div, rounding min: => +01.000000 ...ux expected +01.000000 ....x
Errors found in f32_div, rounding max: +67.000001 -00.7FFFFF +01.000000
Errors found in f64_mul, rounding near_even: => +01.000000 ...ux expected +01.000000 ....x
Errors found in f64_mul, rounding min: +66.7FFFFE -01.000001 +01.000000
Errors found in f64_mul, rounding max: => +01.000000 ...ux expected +01.000000 ....x
Errors found in f64_div, rounding near_even: -00.7FFFFF +67.000001 +01.000000
Errors found in f64_div, rounding minMag: => +01.000000 ...ux expected +01.000000 ....x
Errors found in f64_div, rounding min: -67.000001 +00.7FFFFF +01.000000
Errors found in f64_div, rounding max: => +01.000000 ...ux expected +01.000000 ....x
*/ */

View File

@ -43,6 +43,10 @@ void applyRounding(int rounding){
#define toF32(v) (*((float32_t*)&v)) #define toF32(v) (*((float32_t*)&v))
#define fromF32(x) (*((float*)&(x.v))) #define fromF32(x) (*((float*)&(x.v)))
#define toF64(v) (*((float64_t*)&v))
#define fromF64(x) (*((double*)&(x.v)))
JNIEXPORT jfloat API JNICALL Java_vexriscv_ip_fpu_FpuMath_addF32(JNIEnv * env, jobject obj, jfloat a, jfloat b, jint rounding){ JNIEXPORT jfloat API JNICALL Java_vexriscv_ip_fpu_FpuMath_addF32(JNIEnv * env, jobject obj, jfloat a, jfloat b, jint rounding){
applyRounding(rounding); applyRounding(rounding);
float32_t v = f32_add(toF32(a), toF32(b)); float32_t v = f32_add(toF32(a), toF32(b));
@ -53,4 +57,23 @@ JNIEXPORT jfloat API JNICALL Java_vexriscv_ip_fpu_FpuMath_mulF32(JNIEnv * env, j
applyRounding(rounding); applyRounding(rounding);
float32_t v = f32_mul(toF32(a), toF32(b)); float32_t v = f32_mul(toF32(a), toF32(b));
return fromF32(v); return fromF32(v);
}
JNIEXPORT jint API JNICALL Java_vexriscv_ip_fpu_FpuMath_mulFlagF32(JNIEnv * env, jobject obj, jfloat a, jfloat b, jint rounding){
applyRounding(rounding);
softfloat_exceptionFlags = 0;
float32_t v = f32_mul(toF32(a), toF32(b));
return softfloat_exceptionFlags;
}
JNIEXPORT jfloat API JNICALL Java_vexriscv_ip_fpu_FpuMath_d2f(JNIEnv * env, jobject obj, jdouble a, jint rounding){
applyRounding(rounding);
float32_t v = f64_to_f32(toF64(a));
return fromF32(v);
}
JNIEXPORT jint API JNICALL Java_vexriscv_ip_fpu_FpuMath_d2fFlag(JNIEnv * env, jobject obj, jdouble a, jint rounding){
applyRounding(rounding);
softfloat_exceptionFlags = 0;
float32_t v = f64_to_f32(toF64(a));
return softfloat_exceptionFlags;
} }

View File

@ -5,6 +5,9 @@ import java.io.File;
public class FpuMath { public class FpuMath {
public native float addF32(float a, float b, int rounding); public native float addF32(float a, float b, int rounding);
public native float mulF32(float a, float b, int rounding); public native float mulF32(float a, float b, int rounding);
public native int mulFlagF32(float a, float b, int rounding);
public native float d2f(double a, int rounding);
public native int d2fFlag(double a, int rounding);
static{ static{
System.load(new File("src/test/cpp/fpu/math/fpu_math.so").getAbsolutePath()); System.load(new File("src/test/cpp/fpu/math/fpu_math.so").getAbsolutePath());

View File

@ -59,7 +59,7 @@ class FpuTest extends FunSuite{
val config = SimConfig val config = SimConfig
config.allOptimisation config.allOptimisation
config.withFstWave // config.withFstWave
config.compile(new FpuCore(portCount, p){ config.compile(new FpuCore(portCount, p){
for(i <- 0 until portCount) out(Bits(5 bits)).setName(s"flagAcc$i") := io.port(i).completion.flags.asBits for(i <- 0 until portCount) out(Bits(5 bits)).setName(s"flagAcc$i") := io.port(i).completion.flags.asBits
setDefinitionName("FpuCore"+ (if(p.withDouble) "Double" else "")) setDefinitionName("FpuCore"+ (if(p.withDouble) "Double" else ""))
@ -206,18 +206,18 @@ class FpuTest extends FunSuite{
def softAssert(cond : Boolean, msg : String) = if(!cond)println(msg) def softAssert(cond : Boolean, msg : String) = if(!cond)println(msg)
def flagMatch(ref : Int, value : Float, report : String): Unit ={ def flagMatch(ref : Int, value : Float, report : String): Unit ={
val patch = if(value.abs == 1.17549435E-38f) ref & ~2 else ref val patch = if(value.abs == 1.17549435E-38f && false) 0x1f & ~2 else 0x1f
flagMatch(patch, report) flagMatch(ref, report, patch)
} }
def flagMatch(ref : Int, value : Double, report : String): Unit ={ def flagMatch(ref : Int, value : Double, report : String): Unit ={
val patch = if(value.abs == b2d(1 << 52)) ref & ~2 else ref val patch = if(value.abs == b2d(1 << 52) && false) 0x1f & ~2 else 0x1f
flagMatch(patch, report) flagMatch(ref, report, patch)
} }
def flagMatch(ref : Int, report : String): Unit ={ def flagMatch(ref : Int, report : String, mask : Int = 0x1f): Unit ={
waitUntil(pendingMiaou == 0) waitUntil(pendingMiaou == 0)
assert(flagAccumulator == ref, s"Flag missmatch dut=$flagAccumulator ref=$ref $report") assert((flagAccumulator & mask) == (ref & mask), s"Flag missmatch dut=$flagAccumulator ref=$ref $report")
flagAccumulator = 0 flagAccumulator = 0
} }
def flagClear(): Unit ={ def flagClear(): Unit ={
@ -1257,26 +1257,68 @@ class FpuTest extends FunSuite{
var fxxTests = f32Tests var fxxTests = f32Tests
if(p.withDouble) fxxTests ++= f64Tests if(p.withDouble) fxxTests ++= f64Tests
//5071920 5225560
// for(v <- List(-1.17549435082e-38f, 1.17549435082e-38f);
// rounding <- FpuRoundMode.elements) {
// for (i <- 0 until 2048) {
// val b = d2b(v)// 0x0010000000000000l //d2b(1.17549435082e-38)
// val s = (b - (i.toLong << 21)).toLong
// val d = b2d(s)
//// val rounding = FpuRoundMode.RNE
// testCvtF64F32Raw(d, Clib.math.d2f(d, rounding.position), Clib.math.d2fFlag(d, rounding.position), rounding)
// }
// }
//
//
// testCvtF64F32Raw(-1.1754943508051483E-38, -1.17549435E-38f, 1, FpuRoundMode.RNE)
// testCvtF64F32Raw( 1.1754943157898258E-38, 1.17549435E-38f , 3, FpuRoundMode.RMM)
// testCvtF64F32Raw( 1.1754942807573643E-38, 1.17549435E-38f , 3, FpuRoundMode.RMM)
// testCvtF64F32Raw(-1.1754943508051483E-38, -1.17549435E-38f, 1, FpuRoundMode.RMM)
//-1.1754943508051483E-38 -1.17549435E-38 1 RNE @ 592770
// 1.1754943157898258E-38 1.17549435E-38 3 RMM @ 2697440
// 1.1754942807573643E-38 1.17549435E-38 3 RMM
// for(_ <- 0 until 1000000) testCvtF64F32() // 1 did not equal 3 Flag missmatch dut=1 ref=3 testCvtF64F32Raw 1.1754942807573643E-38 1.17549435E-38 RMM
// println("FCVT_D_S done")
// testBinaryOpF64(div, -2.2250738564511294E-308, 4.294967296003891E9, -5.180654E-318, 1, FpuRoundMode.RDN,"div") // ??? wtf // testBinaryOpF64(div, -2.2250738564511294E-308, 4.294967296003891E9, -5.180654E-318, 1, FpuRoundMode.RDN,"div") // ??? wtf
// testBinaryOp(add,b2f(0x7F800000),b2f(0x1FD << 23),b2f(0x7F800000),0, FpuRoundMode.RNE,"add") // testBinaryOp(add,b2f(0x7F800000),b2f(0x1FD << 23),b2f(0x7F800000),0, FpuRoundMode.RNE,"add")
for(_ <- 0 until 1000000) testDivF64()
println("f64 div done")
// testBinaryOp(mul,1.1753509E-38f, 1.0001221f ,1.17549435E-38f,1, FpuRoundMode.RNE,"mul")
for(_ <- 0 until 10000) testDiv() //
println("f32 div done") // for(i <- 0 until 10000000){
// val rounding = FpuRoundMode.elements.randomPick()
for(_ <- 0 until 10000) testAddF32() // val (a,b,c,f) = f32.mul(rounding).f32_f32_f32
for(_ <- 0 until 10000) testSubF32() // testBinaryOp(mul,a,b,c,f, rounding,"mul")
// }
println("Add done") //
// testBinaryOpF64(mul,2.781342323134002E-309, 7.999999999999999, 2.2250738585072014E-308, 3, FpuRoundMode.RNE,"mul")
//// for(i <- 0 until 10000000){
for(_ <- 0 until 10000) testSqrt() //// val rounding = FpuRoundMode.RNE
println("f32 sqrt done") //// val (a,b,c,f) = f64.mul(rounding).f64_f64_f64
//// testBinaryOpF64(mul,a,b,c,f, rounding,"mul")
//// }
// for(_ <- 0 until 100000000) testMulF64()
// println("f64 Mul done")
//
// for(_ <- 0 until 10000) testDivF64()
// println("f64 div done")
//
//
// for(_ <- 0 until 10000) testDiv()
// println("f32 div done")
//
// for(_ <- 0 until 10000) testAddF32()
// for(_ <- 0 until 10000) testSubF32()
//
// println("Add done")
//
//
// for(_ <- 0 until 10000) testSqrt()
// println("f32 sqrt done")
@ -1533,8 +1575,8 @@ class FpuTest extends FunSuite{
// DoCmd.doCmd(cmd) // DoCmd.doCmd(cmd)
// val math = new FpuMath // val math = new FpuMath
//} //}
//// cd /media/data/open/SaxonSoc/testFloatBuild/berkeley-softfloat-3/build/Linux-x86_64-GCC // cd /media/data/open/SaxonSoc/testFloatBuild/berkeley-softfloat-3/build/Linux-x86_64-GCC
//// make clean && SPECIALIZE_TYPE=RISCV make -j$(nproc) && cp softfloat.a /media/data/open/SaxonSoc/artyA7SmpUpdate/SaxonSoc/ext/VexRiscv/src/test/cpp/fpu/math // make clean && SPECIALIZE_TYPE=RISCV make -j$(nproc) && cp softfloat.a /media/data/open/SaxonSoc/artyA7SmpUpdate/SaxonSoc/ext/VexRiscv/src/test/cpp/fpu/math
//object FpuCompileSo extends App{ //object FpuCompileSo extends App{
// //
//// val b2f = lang.Float.intBitsToFloat(_) //// val b2f = lang.Float.intBitsToFloat(_)
@ -1548,29 +1590,52 @@ class FpuTest extends FunSuite{
//// miaou ffffffff 7fffffe0 7f //// miaou ffffffff 7fffffe0 7f
//// miaou 0 3ffffff0 70 = 0 //// miaou 0 3ffffff0 70 = 0
// //
// val b2f = lang.Float.intBitsToFloat(_)
// val b2d = lang.Double.longBitsToDouble(_)
// val f2b = lang.Float.floatToRawIntBits(_)
// val d2bOffset = BigInt("10000000000000000",16)
// def d2b(that : Double) = {
// val l = lang.Double.doubleToRawLongBits(that)
// var a = BigInt(l)
// if(l < 0) {
// a = d2bOffset + a
// }
// a
// }
// val builder =new StringBuilder()
// for(i <- 0 until 256){
//// builder ++= (Clib.math.mulF32(1.17548538251e-38f, b2f(f2b(1.0f)+i),0)).toString + "\n"
// val b = d2b(1.17549435082e-38)
// val s = (b-(i.toLong << 25)).toLong
// val d = b2d(s)
// builder ++= f"$b $s $d => "
// builder ++= f"${d2b(d)}%x " + (Clib.math.d2fFlag(d,0)).toString + " " + d + " => " + (Clib.math.d2f(d,FpuRoundMode.RMM.position)).toString + "\n"
// }
// //
// Thread.sleep(400)
// println(builder.toString)
// println(Clib.math.mulF32( 1.1753509E-38f, 1.0001221f, FpuRoundMode.RUP.position)) // println(Clib.math.mulF32( 1.1753509E-38f, 1.0001221f, FpuRoundMode.RUP.position))
// println(Clib.math.mulF32( 1.1754945E-38f, 0.9999998f, FpuRoundMode.RUP.position)) // println(Clib.math.mulF32( 1.1754945E-38f, 0.9999998f, FpuRoundMode.RUP.position))
//// testBinaryOp(mul, 1.1753509E-38f, 1.0001221f, 1.17549435E-38f ,1, FpuRoundMode.RUP,"mul") // testBinaryOp(mul, 1.1753509E-38f, 1.0001221f, 1.17549435E-38f ,1, FpuRoundMode.RUP,"mul")
//// testBinaryOp(mul, 1.1754945E-38f, 0.9999998f, 1.17549435E-38f, 3, FpuRoundMode.RUP, "mul") // testBinaryOp(mul, 1.1754945E-38f, 0.9999998f, 1.17549435E-38f, 3, FpuRoundMode.RUP, "mul")
//// miaou ffffffff 7fffffe0 7f // miaou ffffffff 7fffffe0 7f
//// miaou 0 3ffffff0 70 = 0 // miaou 0 3ffffff0 70 = 0
//// miaou ffffffff 7fffff7e 7f // miaou ffffffff 7fffff7e 7f
//// miaou 1 3fffffbf 3f = 1 // miaou 1 3fffffbf 3f = 1
//
//// println(Clib.math.mulF32( 1.1753509E-38f, 1.0001221f, FpuRoundMode.RUP.position)) // println(Clib.math.mulF32( 1.1753509E-38f, 1.0001221f, FpuRoundMode.RUP.position))
//// println(Clib.math.mulF32( 1.469368E-39f, 7.9999995f, FpuRoundMode.RUP.position)) // println(Clib.math.mulF32( 1.469368E-39f, 7.9999995f, FpuRoundMode.RUP.position))
//// println(Clib.math.mulF32( 1.40129846432e-45f, 7.9999995f, FpuRoundMode.RUP.position)) // println(Clib.math.mulF32( 1.40129846432e-45f, 7.9999995f, FpuRoundMode.RUP.position))
//// println(Clib.math.mulF32( 2.93873587706e-39f, 7.9999995f, FpuRoundMode.RUP.position)) // println(Clib.math.mulF32( 2.93873587706e-39f, 7.9999995f, FpuRoundMode.RUP.position))
//// println(Clib.math.mulF32( 1f, 7.9999995f, FpuRoundMode.RUP.position)) // println(Clib.math.mulF32( 1f, 7.9999995f, FpuRoundMode.RUP.position))
//
//
//// println(Clib.math.addF32(1.00000011921f, 4.0f, FpuRoundMode.RNE.position)) // println(Clib.math.addF32(1.00000011921f, 4.0f, FpuRoundMode.RNE.position))
//// println(Clib.math.addF32(1.00000011921f, 4.0f, FpuRoundMode.RTZ.position)) // println(Clib.math.addF32(1.00000011921f, 4.0f, FpuRoundMode.RTZ.position))
//// println(Clib.math.addF32(1.00000011921f, 4.0f, FpuRoundMode.RDN.position)) // println(Clib.math.addF32(1.00000011921f, 4.0f, FpuRoundMode.RDN.position))
//// println(Clib.math.addF32(1.00000011921f, 4.0f, FpuRoundMode.RUP.position)) // println(Clib.math.addF32(1.00000011921f, 4.0f, FpuRoundMode.RUP.position))
//} //}
//
class ProcessStream(cmd : String){ class ProcessStream(cmd : String){
import sys.process._ import sys.process._
@ -1590,19 +1655,4 @@ class ProcessStream(cmd : String){
buf.dequeue()() buf.dequeue()()
} }
} }
//
//object TestSoftFloat extends App{
// val p = new ProcessStream("testfloat_gen -forever f32_add")
// Thread.sleep(1000)
// println(p.next)
// println(p.next)
// println(p.next)
// println(p.next)
// println(p.next)
// Thread.sleep(1000)
// println(p.next)
// while(true) {
// Thread.sleep(10)
// println(p.next)
// }
//}