diff --git a/src/main/scala/vexriscv/ip/fpu/FpuCore.scala b/src/main/scala/vexriscv/ip/fpu/FpuCore.scala index a041484..8c231aa 100644 --- a/src/main/scala/vexriscv/ip/fpu/FpuCore.scala +++ b/src/main/scala/vexriscv/ip/fpu/FpuCore.scala @@ -624,22 +624,25 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{ // val exp = math.exp + U(needShift) // val man = needShift ? math.mulC(p.internalMantissaSize + 1, p.internalMantissaSize bits) | math.mulC(p.internalMantissaSize, p.internalMantissaSize bits) - val mulRounded = (math.mulC >> p.internalMantissaSize) - val needShift = mulRounded.msb + val (mulHigh, mulLow) = math.mulC.splitAt(p.internalMantissaSize-1) + val scrap = mulLow =/= 0 + val needShift = mulHigh.msb val exp = math.exp + U(needShift) - val man = needShift ? mulRounded(1, p.internalMantissaSize bits) | mulRounded(0, p.internalMantissaSize bits) - + val man = needShift ? mulHigh(1, p.internalMantissaSize+1 bits) | mulHigh(0, p.internalMantissaSize+1 bits) + scrap setWhen(needShift && mulHigh(0)) val forceZero = input.rs1.isZero || input.rs2.isZero val forceUnderflow = exp <= exponentOne + exponentOne - 127 - 23 // 0x6A //TODO - val forceOverflow = exp > exponentOne + exponentOne + 127 || input.rs1.isInfinity || input.rs2.isInfinity + val forceOverflow = /*exp > exponentOne + exponentOne + 127 || */input.rs1.isInfinity || input.rs2.isInfinity val forceNan = input.rs1.isNan || input.rs2.isNan || ((input.rs1.isInfinity || input.rs2.isInfinity) && (input.rs1.isZero || input.rs2.isZero)) val output = FpuFloat(p.internalExponentSize, p.internalMantissaSize) output.sign := input.rs1.sign ^ input.rs2.sign output.exponent := (exp - exponentOne).resized - output.mantissa := man + output.mantissa := man.asUInt >> 1 output.setNormal + val round = man(0) ## (scrap) + when(forceNan) { output.setNanQuiet } elsewhen(forceOverflow) { @@ -664,7 +667,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{ output.lockId := input.lockId output.rd := input.rd output.roundMode := input.roundMode - output.round := 0 //TODO + output.round := norm.round.asUInt output.value := norm.output decode.mulToAdd.valid := input.valid && input.add diff --git a/src/test/cpp/fpu/math/fpu_math.c b/src/test/cpp/fpu/math/fpu_math.c index 3d869fe..ee0b3c0 100644 --- a/src/test/cpp/fpu/math/fpu_math.c +++ b/src/test/cpp/fpu/math/fpu_math.c @@ -47,4 +47,10 @@ JNIEXPORT jfloat API JNICALL Java_vexriscv_ip_fpu_FpuMath_addF32(JNIEnv * env, j applyRounding(rounding); float32_t v = f32_add(toF32(a), toF32(b)); return fromF32(v); +} + +JNIEXPORT jfloat API JNICALL Java_vexriscv_ip_fpu_FpuMath_mulF32(JNIEnv * env, jobject obj, jfloat a, jfloat b, jint rounding){ + applyRounding(rounding); + float32_t v = f32_mul(toF32(a), toF32(b)); + return fromF32(v); } \ No newline at end of file diff --git a/src/test/java/vexriscv/ip/fpu/FpuMath.java b/src/test/java/vexriscv/ip/fpu/FpuMath.java index 136186e..88da007 100644 --- a/src/test/java/vexriscv/ip/fpu/FpuMath.java +++ b/src/test/java/vexriscv/ip/fpu/FpuMath.java @@ -4,6 +4,7 @@ import java.io.File; public class FpuMath { public native float addF32(float a, float b, int rounding); + public native float mulF32(float a, float b, int rounding); static{ System.load(new File("src/test/cpp/fpu/math/fpu_math.so").getAbsolutePath()); diff --git a/src/test/scala/vexriscv/ip/fpu/FpuTest.scala b/src/test/scala/vexriscv/ip/fpu/FpuTest.scala index 1fcd058..7d5d3dc 100644 --- a/src/test/scala/vexriscv/ip/fpu/FpuTest.scala +++ b/src/test/scala/vexriscv/ip/fpu/FpuTest.scala @@ -624,12 +624,22 @@ class FpuTest extends FunSuite{ } + for(i <- 0 until 64){ + val rounding = FpuRoundMode.RMM + val a = 24f + val b = b2f(0x3f800000+i) + val c = Clib.math.mulF32(a, b, rounding.position) + val f = 0 + testMulExact(a,b,c,f, rounding) + } -// for(_ <- 0 until 1000000){ -// val rounding = FpuRoundMode.RTZ -// val (a,b,c,f) = f32.mul(rounding).f32_2 -// if(a > 0 && b > 0 && !c.isInfinity) testMulExact(a,b,c,f, rounding) -// } +// simSuccess() + + for(_ <- 0 until 1000000){ + val rounding = FpuRoundMode.elements.randomPick() + val (a,b,c,f) = f32.mul(rounding).f32_2 + if(!(c.abs < 1e-35f && c.abs > 0f)) testMulExact(a,b,c,f, rounding) + } // roundingModes.foreach(rounding => println(Clib.math.addF32(0.0f, 0.0f, rounding.position)))