fpu add now support special floats values and better rounding

2021-01-22 14:55:37 +01:00 · 2021-01-22 14:55:37 +01:00 · 4bd637cf88
parent ccd13b7e9e
commit 4bd637cf88
5 changed files with 194 additions and 40 deletions
--- a/src/main/scala/vexriscv/ip/fpu/FpuCore.scala
+++ b/src/main/scala/vexriscv/ip/fpu/FpuCore.scala
@ -44,7 +44,6 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{

  case class LoadInput() extends Bundle{
    val source = Source()
-    val rs1 = p.internalFloating()
    val rd = p.rfAddress()
    val lockId = lockIdType()
  }
@ -175,7 +174,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
    val useRs1, useRs2, useRs3, useRd = False
    switch(s0.opcode){
      is(p.Opcode.LOAD)    {  useRd := True }
-      is(p.Opcode.STORE)   { useRs2 := True }
+      is(p.Opcode.STORE)   { useRs1 := True }
      is(p.Opcode.ADD)     { useRd  := True; useRs1 := True; useRs2 := True }
      is(p.Opcode.MUL)     { useRd  := True; useRs1 := True; useRs2 := True }
      is(p.Opcode.DIV)     { useRd  := True; useRs1 := True; useRs2 := True }
@ -288,13 +287,39 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
    val filtred = commitFork.load.map(port => port.takeWhen(port.load))
    def feed = filtred(input.source)
    val hazard = !feed.valid
+
+    val f32Mantissa = feed.value(0, 23 bits).asUInt
+    val f32Exponent = feed.value(23, 8 bits).asUInt
+    val f32Sign     = feed.value(31)
+
+    val expZero = f32Exponent === 0
+    val expOne =  f32Exponent === 255
+    val manZero = f32Mantissa === 0
+
+    val isZero      =  expZero &&  manZero
+    val isSubnormal =  expZero && !manZero
+    val isNormal    = !expOne  && !expZero
+    val isInfinity  =  expOne  &&  manZero
+    val isNan       =  expOne  && !manZero
+    val isQuiet     = f32Mantissa.msb
+
+    val recoded = p.internalFloating()
+    recoded.mantissa := f32Mantissa
+    recoded.exponent := f32Exponent
+    recoded.sign     := f32Sign
+    recoded.setNormal
+    when(isZero){recoded.setZero}
+    when(isSubnormal){recoded.setSubnormal}
+    when(isInfinity){recoded.setInfinity}
+    when(isNan){recoded.setNan}
+
    val output = input.haltWhen(hazard).swapPayload(WriteInput())
    filtred.foreach(_.ready := False)
    feed.ready := input.valid && output.ready
    output.source := input.source
    output.lockId := input.lockId
    output.rd := input.rd
-    output.value.assignFromBits(feed.value)
+    output.value := recoded
  }

  val shortPip = new Area{
@ -303,7 +328,25 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
    val rfOutput = Stream(WriteInput())

    val result = p.storeLoadType().assignDontCare()
-    val storeResult = input.rs2.asBits
+
+    val recoded = CombInit(input.rs1)
+    when(recoded.special){
+      switch(input.rs1.exponent(1 downto 0)){
+        is(FpuFloat.ZERO){
+          recoded.mantissa.clearAll()
+          recoded.exponent.clearAll()
+        }
+        is(FpuFloat.INFINITY){
+          recoded.mantissa.clearAll()
+          recoded.exponent.setAll()
+        }
+        is(FpuFloat.NAN){
+          recoded.exponent.setAll()
+        }
+      }
+    }
+
+    val recodedResult =  recoded.asBits.resize(32 bits)

    val f2iShift = input.rs1.exponent - U(exponentOne)
    val f2iShifted = (U"1" @@ input.rs1.mantissa) << (f2iShift.resize(5 bits))
@ -324,6 +367,33 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
      3 -> (!rs1AbsSmaller && !rs1Equal)
    )

+    val rawToFpu = new Area{
+      val f32Mantissa = input.value(0, 23 bits).asUInt
+      val f32Exponent = input.value(23, 8 bits).asUInt
+      val f32Sign     = input.value(31)
+
+      val expZero = f32Exponent === 0
+      val expOne =  f32Exponent === 255
+      val manZero = f32Mantissa === 0
+
+      val isZero      =  expZero &&  manZero
+      val isSubnormal =  expZero && !manZero
+      val isNormal    = !expOne  && !expZero
+      val isInfinity  =  expOne  &&  manZero
+      val isNan       =  expOne  && !manZero
+      val isQuiet     = f32Mantissa.msb
+
+      val recoded = p.internalFloating()
+      recoded.mantissa := f32Mantissa
+      recoded.exponent := f32Exponent
+      recoded.sign     := f32Sign
+      recoded.setNormal
+      when(isZero){recoded.setZero}
+      when(isSubnormal){recoded.setSubnormal}
+      when(isInfinity){recoded.setInfinity}
+      when(isNan){recoded.setNan}
+    }
+
    val minMaxResult = (rs1Smaller ^ input.arg(0)) ? input.rs1 | input.rs2
    val cmpResult = B(rs1Smaller && !input.arg(1) || rs1Equal && !input.arg(0))
    val sgnjResult = (input.rs1.sign && input.arg(1)) ^ input.rs2.sign ^ input.arg(0)
@ -342,10 +412,10 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{


    switch(input.opcode){
-      is(FpuOpcode.STORE)   { result := storeResult }
+      is(FpuOpcode.STORE)   { result := recodedResult }
+      is(FpuOpcode.FMV_X_W) { result := recodedResult } //TODO
      is(FpuOpcode.F2I)     { result := f2iResult }
      is(FpuOpcode.CMP)     { result := cmpResult.resized } //TODO
-      is(FpuOpcode.FMV_X_W) { result := input.rs1.asBits } //TODO
      is(FpuOpcode.FCLASS)  { result := fclassResult.resized }
    }

@ -361,6 +431,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
        rfOutput.value.sign := i2fSign
        rfOutput.value.exponent := i2fLog2 +^ exponentOne
        rfOutput.value.mantissa := U(i2fShifted).resized
+        rfOutput.value.special := False //TODO
      }
      is(FpuOpcode.MIN_MAX){
        rfOutput.value := minMaxResult
@ -369,9 +440,10 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
        rfOutput.value.sign     := sgnjResult
        rfOutput.value.exponent := input.rs1.exponent
        rfOutput.value.mantissa := input.rs1.mantissa
+        rfOutput.value.special := False //TODO
      }
      is(FpuOpcode.FMV_W_X){
-        rfOutput.value.assignFromBits(input.value) //TODO
+        rfOutput.value := rawToFpu.recoded
      }
    }

@ -403,6 +475,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
      output.sign := input.rs1.sign ^ input.rs2.sign
      output.exponent := exp.resized
      output.mantissa := man
+      output.special := False //TODO
    }

    val notMul = new Area{
@ -423,6 +496,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
    decode.mulToAdd.rs1.mantissa := norm.output.mantissa
    decode.mulToAdd.rs1.exponent := norm.output.exponent
    decode.mulToAdd.rs1.sign := norm.output.sign
+    decode.mulToAdd.rs1.special := False //TODO
    decode.mulToAdd.rs2 := input.rs3
    decode.mulToAdd.rd := input.rd
    decode.mulToAdd.lockId := input.lockId
@ -595,20 +669,21 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
    val input = decode.add.stage()

    val shifter = new Area {
-      val exp21 = input.rs2.exponent - input.rs1.exponent
-      val rs1ExponentBigger = exp21.msb
+      val exp21 = input.rs2.exponent -^ input.rs1.exponent
+      val rs1ExponentBigger = exp21.msb || input.rs2.isZeroOrSubnormal
      val rs1ExponentEqual = input.rs1.exponent === input.rs2.exponent
      val rs1MantissaBigger = input.rs1.mantissa > input.rs2.mantissa
-      val absRs1Bigger = rs1ExponentBigger|| rs1ExponentEqual && rs1MantissaBigger
+      val absRs1Bigger = ((rs1ExponentBigger || rs1ExponentEqual && rs1MantissaBigger) && !input.rs1.isZeroOrSubnormal || input.rs1.isInfinity) && !input.rs2.isInfinity
      val shiftBy = rs1ExponentBigger ? (0-exp21) | exp21
+      val passThrough = shiftBy >= p.internalMantissaSize || (input.rs1.isZeroOrSubnormal) || (input.rs2.isZeroOrSubnormal)

      //Note that rs1ExponentBigger can be replaced by absRs1Bigger bellow to avoid xsigned two complement in math block at expense of combinatorial path
      val xySign = absRs1Bigger ? input.rs1.sign | input.rs2.sign
      val xSign = xySign ^ (rs1ExponentBigger ? input.rs1.sign | input.rs2.sign)
      val ySign = xySign ^ (rs1ExponentBigger ? input.rs2.sign | input.rs1.sign)
-      val xMantissa = U"1" @@ (rs1ExponentBigger ? input.rs1.mantissa | input.rs2.mantissa)
-      val yMantissaUnshifted = U"1" @@ (rs1ExponentBigger ? input.rs2.mantissa | input.rs1.mantissa)
-      val yMantissa = yMantissaUnshifted >> shiftBy
+      val xMantissa = U"1" @@ (rs1ExponentBigger ? input.rs1.mantissa | input.rs2.mantissa) @@ U"0"
+      val yMantissaUnshifted = U"1" @@ (rs1ExponentBigger ? input.rs2.mantissa | input.rs1.mantissa) @@ U"0"
+      val yMantissa = yMantissaUnshifted >> (passThrough.asUInt @@ shiftBy.resize(log2Up(p.internalMantissaSize)))
      val xyExponent = rs1ExponentBigger ? input.rs1.exponent | input.rs2.exponent
    }

@ -621,8 +696,9 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
      def xySign = shifter.xySign

      val xSigned = xMantissa.twoComplement(xSign)
-      val ySigned = yMantissa.twoComplement(ySign)
-      val xyMantissa = U(xSigned +^ ySigned).trim(1 bits)
+//      val ySigned = (yMantissa +^ (yMantissa.lsb && !ySign).asUInt).twoComplement(ySign)
+      val ySigned = ((ySign ## Mux(ySign, ~yMantissa, yMantissa)).asUInt +^ (ySign || yMantissa.lsb).asUInt).asSInt
+      val xyMantissa = U(xSigned + ySigned).trim(1 bits)
    }

    val norm = new Area{
@ -632,16 +708,13 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{

      val shiftOh = OHMasking.first(xyMantissa.asBools.reverse)
      val shift = OHToUInt(shiftOh)
-      val mantissa = (xyMantissa |<< shift) >> 1
-      val exponent = xyExponent - shift + 1
-      val forceZero = xyMantissa === 0
-      val forceOverflow = exponent === exponent.maxValue
-      val forceNan =
-//      val
-      when(forceZero){ //TODO
-        exponent := 0
-        xySign := False
-      }
+      val mantissa = (xyMantissa |<< shift) >> 2
+//      val mantissaShifted = (xyMantissa |<< shift)
+//      val mantissa = ((xyMantissa ) >> 2) + U(xyMantissa(1))
+      val exponent = xyExponent -^ shift + 1
+      val forceZero = xyMantissa === 0 || exponent.msb || (input.rs1.isZeroOrSubnormal && input.rs2.isZeroOrSubnormal)
+      val forceOverflow = exponent(7 downto 0) === 255 ||  (input.rs1.isInfinity || input.rs2.isInfinity)
+      val forceNan = input.rs1.isNan || input.rs2.isNan || (input.rs1.isInfinity && input.rs2.isInfinity && (input.rs1.sign ^ input.rs2.sign))
    }


@ -651,7 +724,17 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
    output.rd     := input.rd
    output.value.sign := norm.xySign
    output.value.mantissa := norm.mantissa.resized
-    output.value.exponent := norm.exponent
+    output.value.exponent := norm.exponent.resized
+    output.value.special := False
+
+    when(norm.forceNan) {
+      output.value.setNanQuiet
+    } elsewhen(norm.forceZero) {
+      output.value.setZero;
+      output.value.sign := False
+    } elsewhen(norm.forceOverflow) {
+      output.value.setInfinity
+    }
  }


--- a/src/main/scala/vexriscv/ip/fpu/Interface.scala
+++ b/src/main/scala/vexriscv/ip/fpu/Interface.scala
@ -22,11 +22,20 @@ case class FpuFloatDecoded() extends Bundle{
  val isInfinity = Bool()
  val isQuiet = Bool()
 }
+
+object FpuFloat{
+  val ZERO = 0
+  val SUBNORMAL = 1
+  val INFINITY = 2
+  val NAN = 3
+}
+
 case class FpuFloat(exponentSize: Int,
                    mantissaSize: Int) extends Bundle {
  val mantissa = UInt(mantissaSize bits)
  val exponent = UInt(exponentSize bits)
  val sign = Bool()
+  val special = Bool()

  def withInvertSign : FpuFloat ={
    val ret = FpuFloat(exponentSize,mantissaSize)
@ -37,7 +46,34 @@ case class FpuFloat(exponentSize: Int,
  }


+  def isZeroOrSubnormal =  special && exponent(1) === False
+
+  def isNormal    = !special
+  def isZero      =  special && exponent(1 downto 0) === 0
+  def isSubnormal =  special && exponent(1 downto 0) === 1
+  def isInfinity  =  special && exponent(1 downto 0) === 2
+  def isNan       =  special && exponent(1 downto 0) === 3
+  def isQuiet     =  mantissa.msb
+
+  def setNormal    =  { special := False }
+  def setZero      =  { special := True; exponent(1 downto 0) := 0 }
+  def setSubnormal =  { special := True; exponent(1 downto 0) := 1 }
+  def setInfinity  =  { special := True; exponent(1 downto 0) := 2 }
+  def setNan       =  { special := True; exponent(1 downto 0) := 3 }
+  def setNanQuiet  =  { special := True; exponent(1 downto 0) := 3; mantissa.msb := True }
+
  def decode() = {
+    val ret = FpuFloatDecoded()
+    ret.isZero      := isZero
+    ret.isSubnormal := isSubnormal
+    ret.isNormal    := isNormal
+    ret.isInfinity  := isInfinity
+    ret.isNan       := isNan
+    ret.isQuiet     := mantissa.msb
+    ret
+  }
+
+  def decodeIeee754() = {
    val ret = FpuFloatDecoded()
    val expZero = exponent === 0
    val expOne = exponent === exponent.maxValue
@ -46,7 +82,7 @@ case class FpuFloat(exponentSize: Int,
    ret.isSubnormal := expZero && !manZero
    ret.isNormal := !expOne && !expZero
    ret.isInfinity := expOne && manZero
-    ret.isNan := expOne && !manZero// && !sign
+    ret.isNan := expOne && !manZero
    ret.isQuiet := mantissa.msb
    ret
  }
--- a/src/main/scala/vexriscv/plugin/FpuPlugin.scala
+++ b/src/main/scala/vexriscv/plugin/FpuPlugin.scala
@ -174,7 +174,7 @@ class FpuPlugin(externalFpu : Boolean = false,
      port.cmd.opcode   := input(FPU_OPCODE)
      port.cmd.value    := RegNext(output(RS1))
      port.cmd.arg      := input(FPU_ARG)
-      port.cmd.rs1      := input(INSTRUCTION)(rs1Range).asUInt
+      port.cmd.rs1      := ((input(FPU_OPCODE) === FpuOpcode.STORE) ? input(INSTRUCTION)(rs2Range).asUInt | input(INSTRUCTION)(rs1Range).asUInt)
      port.cmd.rs2      := input(INSTRUCTION)(rs2Range).asUInt
      port.cmd.rs3      := input(INSTRUCTION)(rs3Range).asUInt
      port.cmd.rd       := input(INSTRUCTION)(rdRange).asUInt
--- a/src/test/scala/vexriscv/ip/fpu/FpuTest.scala
+++ b/src/test/scala/vexriscv/ip/fpu/FpuTest.scala
@ -14,6 +14,10 @@ import scala.util.Random

 class FpuTest extends FunSuite{

+  val b2f = lang.Float.intBitsToFloat(_)
+  def clamp(f : Float) = {
+    if(f.abs < b2f(0x00800000)) 0.0f*f.signum else f
+  }

  test("directed"){
    val portCount = 1
@ -81,8 +85,8 @@ class FpuTest extends FunSuite{
          cmdQueue += {cmd =>
            cmd.opcode #= cmd.opcode.spinalEnum.STORE
            cmd.value.randomize()
-            cmd.rs1.randomize()
-            cmd.rs2 #= rs
+            cmd.rs1 #= rs
+            cmd.rs2.randomize()
            cmd.rs3.randomize()
            cmd.rd.randomize()
            cmd.arg.randomize()
@ -92,7 +96,7 @@ class FpuTest extends FunSuite{
        }

        def storeFloat(rs : Int)(body : Float => Unit): Unit ={
-          storeRaw(rs){rsp => body(lang.Float.intBitsToFloat(rsp.value.toLong.toInt))}
+          storeRaw(rs){rsp => body(b2f(rsp.value.toLong.toInt))}
        }

        def mul(rd : Int, rs1 : Int, rs2 : Int): Unit ={
@ -304,9 +308,19 @@ class FpuTest extends FunSuite{
          }
        }
        def checkFloat(ref : Float, dut : Float): Boolean ={
-          if(ref === dut) return  true
-          ref.abs * 1.0001 > dut.abs && ref.abs * 0.9999 < dut.abs && ref.signum == dut.signum
+          if(ref.signum != dut.signum === dut) return  false
+          if(ref.isNaN && dut.isNaN) return true
+          if(ref == dut) return true
+          if(ref.abs * 1.0001 > dut.abs && ref.abs * 0.9999 < dut.abs && ref.signum == dut.signum) return true
+          false
        }
+        def checkFloatExact(ref : Float, dut : Float): Boolean ={
+          if(ref.signum != dut.signum === dut) return  false
+          if(ref.isNaN && dut.isNaN) return true
+          if(ref == dut) return true
+          false
+        }
+

        def randomFloat(): Float ={
          val exp = Random.nextInt(10)-5
@ -322,7 +336,9 @@ class FpuTest extends FunSuite{

          add(rd,rs1,rs2)
          storeFloat(rd){v =>
-            val ref = a+b
+            val a_ = clamp(a)
+            val b_ = clamp(b)
+            val ref = clamp(a_ + b_)
            println(f"$a + $b = $v, $ref")
            assert(checkFloat(ref, v))
          }
@ -450,7 +466,7 @@ class FpuTest extends FunSuite{
          val rd = Random.nextInt(32)
          fmv_w_x(rd, a)
          storeFloat(rd){v =>
-            val ref = lang.Float.intBitsToFloat(a)
+            val ref = b2f(a)
            println(f"fmv_w_x $a = $v, $ref")
            assert(v === ref)
          }
@ -488,16 +504,35 @@ class FpuTest extends FunSuite{
          }
        }

-
-        val b2f = lang.Float.intBitsToFloat(_)
+        //Todo negative
+        def withMinus(that : Seq[Float]) = that.flatMap(f => List(f, -f))
+        val fZeros = withMinus(List(0.0f))
+        val fSubnormals = withMinus(List(b2f(0x00000000+1), b2f(0x00000000+2), b2f(0x00800000-2), b2f(0x00800000-1)))
+        val fExpSmall = withMinus(List(b2f(0x00800000), b2f(0x00800000+1), b2f(0x00800000 + 2)))
+        val fExpNormal = withMinus(List(b2f(0x3f800000-2), b2f(0x3f800000-1), b2f(0x3f800000), b2f(0x3f800000+1), b2f(0x3f800000+2)))
+        val fExpBig = withMinus(List(b2f(0x7f7fffff-2), b2f(0x7f7fffff-1), b2f(0x7f7fffff)))
+        val fInfinity = withMinus(List(Float.PositiveInfinity))
+        val fNan = List(Float.NaN, b2f(0x7f820000), b2f(0x7fc00000))
+        val fAll = fZeros ++ fSubnormals ++ fExpSmall ++ fExpNormal ++ fExpBig ++ fInfinity ++ fNan


+        testAdd(b2f(0x3f800000), b2f(0x3f800000-1))
+        testAdd(1.1f, 2.3f)
        testAdd(1.2f, -1.2f)
        testAdd(-1.2f, 1.2f)
        testAdd(0.0f, -1.2f)
        testAdd(-0.0f, -1.2f)
        testAdd(1.2f, -0f)
        testAdd(1.2f, 0f)
+        testAdd(1.1f, Float.MinPositiveValue)
+
+        for(a <- fAll; _ <- 0 until 50) testAdd(a, randomFloat())
+        for(b <- fAll; _ <- 0 until 50) testAdd(randomFloat(), b)
+        for(a <- fAll; b <- fAll) testAdd(a, b)
+        for(_ <- 0 until 1000) testAdd(randomFloat(), randomFloat())
+
+//        dut.clockDomain.waitSampling(10000000)
+

        testFmv_x_w(1.246f)
        testFmv_w_x(lang.Float.floatToIntBits(7.234f))
@ -590,9 +625,7 @@ class FpuTest extends FunSuite{
        testDiv(1.0f, b2f(0x3f800001))
        testDiv(1.0f, b2f(0x3f800002))

-        for(i <- 0 until 1000){
-          testAdd(randomFloat(), randomFloat())
-        }
+
        for(i <- 0 until 1000){
          testMul(randomFloat(), randomFloat())
        }
--- a/src/test/scala/vexriscv/ip/fpu/Playground.scala
+++ b/src/test/scala/vexriscv/ip/fpu/Playground.scala
@ -42,4 +42,6 @@ object MiaouNan extends App{
  println(3.0f + Float.NaN )
  println(0.0f*Float.PositiveInfinity )
  println(1.0f/0.0f )
+  println(Float.MaxValue -1 )
+  println(Float.PositiveInfinity - Float.PositiveInfinity)
 }