diff --git a/src/main/scala/vexriscv/ip/fpu/FpuCore.scala b/src/main/scala/vexriscv/ip/fpu/FpuCore.scala index 9acc624..a041484 100644 --- a/src/main/scala/vexriscv/ip/fpu/FpuCore.scala +++ b/src/main/scala/vexriscv/ip/fpu/FpuCore.scala @@ -624,7 +624,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{ // val exp = math.exp + U(needShift) // val man = needShift ? math.mulC(p.internalMantissaSize + 1, p.internalMantissaSize bits) | math.mulC(p.internalMantissaSize, p.internalMantissaSize bits) - val mulRounded = (math.mulC >> p.internalMantissaSize) + math.mulC(p.internalMantissaSize-1).asUInt + val mulRounded = (math.mulC >> p.internalMantissaSize) val needShift = mulRounded.msb val exp = math.exp + U(needShift) val man = needShift ? mulRounded(1, p.internalMantissaSize bits) | mulRounded(0, p.internalMantissaSize bits) @@ -903,7 +903,6 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{ def xySign = shifter.xySign val xSigned = xMantissa.twoComplement(xSign) //TODO Is that necessary ? - val overshot = (ySign && shifter.roundingScrap) val ySigned = ((ySign ## Mux(ySign, ~yMantissa, yMantissa)).asUInt + (ySign && !shifter.roundingScrap).asUInt).asSInt //rounding here val xyMantissa = U(xSigned +^ ySigned).trim(1 bits) } @@ -916,11 +915,9 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{ val shiftOh = OHMasking.first(xyMantissa.asBools.reverse) val shift = OHToUInt(shiftOh) val mantissa = (xyMantissa |<< shift) -// val mantissa = ((shifter.roundingScrap.asUInt @@ xyMantissa.reversed) |>> shift).reversed >> 1 val exponent = xyExponent -^ shift + 1 - xySign clearWhen(input.rs1.isZero && input.rs2.isZero) - val forceZero = xyMantissa === 0 || exponent.msb || (input.rs1.isZero && input.rs2.isZero) - val forceOverflow = exponent === exponentOne + 128 + val forceZero = xyMantissa === 0 || (input.rs1.isZero && input.rs2.isZero) +// val forceOverflow = exponent === exponentOne + 128 //Handled by writeback rounding val forceInfinity = (input.rs1.isInfinity || input.rs2.isInfinity) val forceNan = input.rs1.isNan || input.rs2.isNan || (input.rs1.isInfinity && input.rs2.isInfinity && (input.rs1.sign ^ input.rs2.sign)) } @@ -949,13 +946,13 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{ } } elsewhen(norm.forceInfinity) { output.value.setInfinity - } elsewhen(norm.forceOverflow) { + } /*elsewhen(norm.forceOverflow) { val doMax = input.roundMode.mux( - FpuRoundMode.RNE -> (True), + FpuRoundMode.RNE -> (False), FpuRoundMode.RTZ -> (True), FpuRoundMode.RDN -> (!output.value.sign), FpuRoundMode.RUP -> (output.value.sign), - FpuRoundMode.RMM -> (True) + FpuRoundMode.RMM -> (False) ) when(doMax){ output.value.exponent := exponentOne + 127 @@ -963,7 +960,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{ } otherwise { output.value.setInfinity } - } + }*/ } @@ -992,10 +989,25 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{ math.mantissa := adder(0, p.internalMantissaSize bits) val patched = CombInit(math) - when(!input.value.special && math.exponent === exponentOne + 128){ - patched.setInfinity + when(!math.special && math.exponent >= exponentOne + 128){ +// patched.setInfinity + val doMax = input.roundMode.mux( + FpuRoundMode.RNE -> (False), + FpuRoundMode.RTZ -> (True), + FpuRoundMode.RDN -> (!math.sign), + FpuRoundMode.RUP -> (math.sign), + FpuRoundMode.RMM -> (False) + ) + when(doMax){ + patched.exponent := exponentOne + 127 + patched.mantissa.setAll() + } otherwise { + patched.setInfinity + } } + + val output = input.swapPayload(RoundOutput()) output.source := input.source output.lockId := input.lockId diff --git a/src/test/scala/vexriscv/ip/fpu/FpuTest.scala b/src/test/scala/vexriscv/ip/fpu/FpuTest.scala index 7f47669..1fcd058 100644 --- a/src/test/scala/vexriscv/ip/fpu/FpuTest.scala +++ b/src/test/scala/vexriscv/ip/fpu/FpuTest.scala @@ -69,6 +69,7 @@ class FpuTest extends FunSuite{ val f32 = new { val add = new TestCase("f32", "add") + val mul = new TestCase("f32", "mul") } val cpus = for(id <- 0 until portCount) yield new { @@ -137,7 +138,7 @@ class FpuTest extends FunSuite{ storeRaw(rs){rsp => body(b2f(rsp.value.toLong.toInt))} } - def mul(rd : Int, rs1 : Int, rs2 : Int): Unit ={ + def mul(rd : Int, rs1 : Int, rs2 : Int, rounding : FpuRoundMode.E = FpuRoundMode.RNE): Unit ={ cmdQueue += {cmd => cmd.opcode #= cmd.opcode.spinalEnum.MUL cmd.rs1 #= rs1 @@ -145,6 +146,7 @@ class FpuTest extends FunSuite{ cmd.rs3.randomize() cmd.rd #= rd cmd.arg #= 0 + cmd.roundMode #= rounding } commitQueue += {cmd => cmd.write #= true @@ -388,6 +390,19 @@ class FpuTest extends FunSuite{ } } + + def testMulExact(a : Float, b : Float, ref : Float, flag : Int, rounding : FpuRoundMode.E): Unit ={ + val rs = new RegAllocator() + val rs1, rs2, rs3 = rs.allocate() + val rd = Random.nextInt(32) + load(rs1, a) + load(rs2, b) + mul(rd,rs1,rs2, rounding) + storeFloat(rd){v => + assert(f2b(v) == f2b(ref), f"## ${a} * $b = $v, $ref $rounding") + } + } + def testLoadStore(a : Float): Unit ={ val rd = Random.nextInt(32) load(rd, a) @@ -418,6 +433,7 @@ class FpuTest extends FunSuite{ } + def testFma(a : Float, b : Float, c : Float): Unit ={ val rs = new RegAllocator() val rs1, rs2, rs3 = rs.allocate() @@ -609,19 +625,34 @@ class FpuTest extends FunSuite{ +// for(_ <- 0 until 1000000){ +// val rounding = FpuRoundMode.RTZ +// val (a,b,c,f) = f32.mul(rounding).f32_2 +// if(a > 0 && b > 0 && !c.isInfinity) testMulExact(a,b,c,f, rounding) +// } + + // roundingModes.foreach(rounding => println(Clib.math.addF32(0.0f, 0.0f, rounding.position))) // roundingModes.foreach(rounding => println(Clib.math.addF32(1.0f,-1.0f, rounding.position))) - println() - println(Clib.math.addF32(8.0f, b2f(0xBf800000), 0)) - println(Clib.math.addF32(8.0f, b2f(0xBf800001), 0)) - println(Clib.math.addF32(8.0f, b2f(0xBf800002), 0)) - println(Clib.math.addF32(8.0f, b2f(0xBf800003), 0)) - println(Clib.math.addF32(8.0f, b2f(0xBf800004), 0)) - println(Clib.math.addF32(8.0f, b2f(0xBf800005), 0)) - println(Clib.math.addF32(8.0f, b2f(0xBf800006), 0)) - println(Clib.math.addF32(8.0f, b2f(0xBf800007), 0)) - println(Clib.math.addF32(8.0f, b2f(0xBf800008), 0)) + println("Mul done") + + for(i <- 0 until 20) println(Clib.math.addF32(b2f(0x7f000000), b2f(0x7f000000-10+i), 0)) +// simSuccess() + + foreachRounding(r => println(Clib.math.addF32(b2f(0x7f7fffff), b2f(0x7f7fffff),r.position))) + println("") + foreachRounding(r => println(Clib.math.addF32(2.5787021E38f, 3.4027196E38f,r.position))) + println("") +// println(Clib.math.addF32(8.0f, b2f(0xBf800000), 0)) +// println(Clib.math.addF32(8.0f, b2f(0xBf800001), 0)) +// println(Clib.math.addF32(8.0f, b2f(0xBf800002), 0)) +// println(Clib.math.addF32(8.0f, b2f(0xBf800003), 0)) +// println(Clib.math.addF32(8.0f, b2f(0xBf800004), 0)) +// println(Clib.math.addF32(8.0f, b2f(0xBf800005), 0)) +// println(Clib.math.addF32(8.0f, b2f(0xBf800006), 0)) +// println(Clib.math.addF32(8.0f, b2f(0xBf800007), 0)) +// println(Clib.math.addF32(8.0f, b2f(0xBf800008), 0)) testAdd(-5.3687091E8f, 16.249022f, FpuRoundMode.RNE) testAdd(-5.3687091E8f, 16.0f, FpuRoundMode.RNE) @@ -645,7 +676,13 @@ class FpuTest extends FunSuite{ for(_ <- 0 until 1000000){ val rounding = FpuRoundMode.elements.randomPick() val (a,b,c,f) = f32.add(rounding).f32_2 - if(/*a > 0 && b < 0 && */!c.isInfinity) testAddExact(a,b,c,f, rounding) +// if(a.isNaN) println("Nan") +// if(b.isNaN) println("Nan") +// if(a.isInfinity) println("Inf") +// if(b.isInfinity) println("Inf") +// if(a == 0f) println("Zero") +// if(b == 0f) println("Zero") + /*if(/*a > 0 && b < 0 && */!c.isInfinity) */testAddExact(a,b,c,f, rounding) } waitUntil(cmdQueue.isEmpty) @@ -924,10 +961,17 @@ object Clib { object FpuCompileSo extends App{ - println(Clib.math.addF32(1.00000011921f, 4.0f, FpuRoundMode.RNE.position)) - println(Clib.math.addF32(1.00000011921f, 4.0f, FpuRoundMode.RTZ.position)) - println(Clib.math.addF32(1.00000011921f, 4.0f, FpuRoundMode.RDN.position)) - println(Clib.math.addF32(1.00000011921f, 4.0f, FpuRoundMode.RUP.position)) + val b2f = lang.Float.intBitsToFloat(_) + for(e <- FpuRoundMode.elements) { + println(e) + for (i <- -2 until 50) println(i + " => " + Clib.math.addF32(b2f(0x7f000000), b2f(0x7f000000 + i), e.position)) + println("") + } + +// println(Clib.math.addF32(1.00000011921f, 4.0f, FpuRoundMode.RNE.position)) +// println(Clib.math.addF32(1.00000011921f, 4.0f, FpuRoundMode.RTZ.position)) +// println(Clib.math.addF32(1.00000011921f, 4.0f, FpuRoundMode.RDN.position)) +// println(Clib.math.addF32(1.00000011921f, 4.0f, FpuRoundMode.RUP.position)) } class ProcessStream(cmd : String){