From 4bd637cf88875be4c1533f838955f34e2b5401bd Mon Sep 17 00:00:00 2001
From: Dolu1990 <charles.papon.90@gmail.com>
Date: Fri, 22 Jan 2021 14:55:37 +0100
Subject: [PATCH] fpu add now support special floats values and better rounding

---
 src/main/scala/vexriscv/ip/fpu/FpuCore.scala  | 135 ++++++++++++++----
 .../scala/vexriscv/ip/fpu/Interface.scala     |  38 ++++-
 .../scala/vexriscv/plugin/FpuPlugin.scala     |   2 +-
 src/test/scala/vexriscv/ip/fpu/FpuTest.scala  |  57 ++++++--
 .../scala/vexriscv/ip/fpu/Playground.scala    |   2 +
 5 files changed, 194 insertions(+), 40 deletions(-)

diff --git a/src/main/scala/vexriscv/ip/fpu/FpuCore.scala b/src/main/scala/vexriscv/ip/fpu/FpuCore.scala
index 9d39fdf..bf5749b 100644
--- a/src/main/scala/vexriscv/ip/fpu/FpuCore.scala
+++ b/src/main/scala/vexriscv/ip/fpu/FpuCore.scala
@@ -44,7 +44,6 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
 
   case class LoadInput() extends Bundle{
     val source = Source()
-    val rs1 = p.internalFloating()
     val rd = p.rfAddress()
     val lockId = lockIdType()
   }
@@ -175,7 +174,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
     val useRs1, useRs2, useRs3, useRd = False
     switch(s0.opcode){
       is(p.Opcode.LOAD)    {  useRd := True }
-      is(p.Opcode.STORE)   { useRs2 := True }
+      is(p.Opcode.STORE)   { useRs1 := True }
       is(p.Opcode.ADD)     { useRd  := True; useRs1 := True; useRs2 := True }
       is(p.Opcode.MUL)     { useRd  := True; useRs1 := True; useRs2 := True }
       is(p.Opcode.DIV)     { useRd  := True; useRs1 := True; useRs2 := True }
@@ -288,13 +287,39 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
     val filtred = commitFork.load.map(port => port.takeWhen(port.load))
     def feed = filtred(input.source)
     val hazard = !feed.valid
+
+    val f32Mantissa = feed.value(0, 23 bits).asUInt
+    val f32Exponent = feed.value(23, 8 bits).asUInt
+    val f32Sign     = feed.value(31)
+
+    val expZero = f32Exponent === 0
+    val expOne =  f32Exponent === 255
+    val manZero = f32Mantissa === 0
+
+    val isZero      =  expZero &&  manZero
+    val isSubnormal =  expZero && !manZero
+    val isNormal    = !expOne  && !expZero
+    val isInfinity  =  expOne  &&  manZero
+    val isNan       =  expOne  && !manZero
+    val isQuiet     = f32Mantissa.msb
+
+    val recoded = p.internalFloating()
+    recoded.mantissa := f32Mantissa
+    recoded.exponent := f32Exponent
+    recoded.sign     := f32Sign
+    recoded.setNormal
+    when(isZero){recoded.setZero}
+    when(isSubnormal){recoded.setSubnormal}
+    when(isInfinity){recoded.setInfinity}
+    when(isNan){recoded.setNan}
+
     val output = input.haltWhen(hazard).swapPayload(WriteInput())
     filtred.foreach(_.ready := False)
     feed.ready := input.valid && output.ready
     output.source := input.source
     output.lockId := input.lockId
     output.rd := input.rd
-    output.value.assignFromBits(feed.value)
+    output.value := recoded
   }
 
   val shortPip = new Area{
@@ -303,7 +328,25 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
     val rfOutput = Stream(WriteInput())
 
     val result = p.storeLoadType().assignDontCare()
-    val storeResult = input.rs2.asBits
+
+    val recoded = CombInit(input.rs1)
+    when(recoded.special){
+      switch(input.rs1.exponent(1 downto 0)){
+        is(FpuFloat.ZERO){
+          recoded.mantissa.clearAll()
+          recoded.exponent.clearAll()
+        }
+        is(FpuFloat.INFINITY){
+          recoded.mantissa.clearAll()
+          recoded.exponent.setAll()
+        }
+        is(FpuFloat.NAN){
+          recoded.exponent.setAll()
+        }
+      }
+    }
+
+    val recodedResult =  recoded.asBits.resize(32 bits)
 
     val f2iShift = input.rs1.exponent - U(exponentOne)
     val f2iShifted = (U"1" @@ input.rs1.mantissa) << (f2iShift.resize(5 bits))
@@ -324,6 +367,33 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
       3 -> (!rs1AbsSmaller && !rs1Equal)
     )
 
+    val rawToFpu = new Area{
+      val f32Mantissa = input.value(0, 23 bits).asUInt
+      val f32Exponent = input.value(23, 8 bits).asUInt
+      val f32Sign     = input.value(31)
+
+      val expZero = f32Exponent === 0
+      val expOne =  f32Exponent === 255
+      val manZero = f32Mantissa === 0
+
+      val isZero      =  expZero &&  manZero
+      val isSubnormal =  expZero && !manZero
+      val isNormal    = !expOne  && !expZero
+      val isInfinity  =  expOne  &&  manZero
+      val isNan       =  expOne  && !manZero
+      val isQuiet     = f32Mantissa.msb
+
+      val recoded = p.internalFloating()
+      recoded.mantissa := f32Mantissa
+      recoded.exponent := f32Exponent
+      recoded.sign     := f32Sign
+      recoded.setNormal
+      when(isZero){recoded.setZero}
+      when(isSubnormal){recoded.setSubnormal}
+      when(isInfinity){recoded.setInfinity}
+      when(isNan){recoded.setNan}
+    }
+
     val minMaxResult = (rs1Smaller ^ input.arg(0)) ? input.rs1 | input.rs2
     val cmpResult = B(rs1Smaller && !input.arg(1) || rs1Equal && !input.arg(0))
     val sgnjResult = (input.rs1.sign && input.arg(1)) ^ input.rs2.sign ^ input.arg(0)
@@ -342,10 +412,10 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
 
 
     switch(input.opcode){
-      is(FpuOpcode.STORE)   { result := storeResult }
+      is(FpuOpcode.STORE)   { result := recodedResult }
+      is(FpuOpcode.FMV_X_W) { result := recodedResult } //TODO
       is(FpuOpcode.F2I)     { result := f2iResult }
       is(FpuOpcode.CMP)     { result := cmpResult.resized } //TODO
-      is(FpuOpcode.FMV_X_W) { result := input.rs1.asBits } //TODO
       is(FpuOpcode.FCLASS)  { result := fclassResult.resized }
     }
 
@@ -361,6 +431,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
         rfOutput.value.sign := i2fSign
         rfOutput.value.exponent := i2fLog2 +^ exponentOne
         rfOutput.value.mantissa := U(i2fShifted).resized
+        rfOutput.value.special := False //TODO
       }
       is(FpuOpcode.MIN_MAX){
         rfOutput.value := minMaxResult
@@ -369,9 +440,10 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
         rfOutput.value.sign     := sgnjResult
         rfOutput.value.exponent := input.rs1.exponent
         rfOutput.value.mantissa := input.rs1.mantissa
+        rfOutput.value.special := False //TODO
       }
       is(FpuOpcode.FMV_W_X){
-        rfOutput.value.assignFromBits(input.value) //TODO
+        rfOutput.value := rawToFpu.recoded
       }
     }
 
@@ -403,6 +475,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
       output.sign := input.rs1.sign ^ input.rs2.sign
       output.exponent := exp.resized
       output.mantissa := man
+      output.special := False //TODO
     }
 
     val notMul = new Area{
@@ -423,6 +496,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
     decode.mulToAdd.rs1.mantissa := norm.output.mantissa
     decode.mulToAdd.rs1.exponent := norm.output.exponent
     decode.mulToAdd.rs1.sign := norm.output.sign
+    decode.mulToAdd.rs1.special := False //TODO
     decode.mulToAdd.rs2 := input.rs3
     decode.mulToAdd.rd := input.rd
     decode.mulToAdd.lockId := input.lockId
@@ -595,20 +669,21 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
     val input = decode.add.stage()
 
     val shifter = new Area {
-      val exp21 = input.rs2.exponent - input.rs1.exponent
-      val rs1ExponentBigger = exp21.msb
+      val exp21 = input.rs2.exponent -^ input.rs1.exponent
+      val rs1ExponentBigger = exp21.msb || input.rs2.isZeroOrSubnormal
       val rs1ExponentEqual = input.rs1.exponent === input.rs2.exponent
       val rs1MantissaBigger = input.rs1.mantissa > input.rs2.mantissa
-      val absRs1Bigger = rs1ExponentBigger|| rs1ExponentEqual && rs1MantissaBigger
+      val absRs1Bigger = ((rs1ExponentBigger || rs1ExponentEqual && rs1MantissaBigger) && !input.rs1.isZeroOrSubnormal || input.rs1.isInfinity) && !input.rs2.isInfinity
       val shiftBy = rs1ExponentBigger ? (0-exp21) | exp21
+      val passThrough = shiftBy >= p.internalMantissaSize || (input.rs1.isZeroOrSubnormal) || (input.rs2.isZeroOrSubnormal)
 
       //Note that rs1ExponentBigger can be replaced by absRs1Bigger bellow to avoid xsigned two complement in math block at expense of combinatorial path
       val xySign = absRs1Bigger ? input.rs1.sign | input.rs2.sign
       val xSign = xySign ^ (rs1ExponentBigger ? input.rs1.sign | input.rs2.sign)
       val ySign = xySign ^ (rs1ExponentBigger ? input.rs2.sign | input.rs1.sign)
-      val xMantissa = U"1" @@ (rs1ExponentBigger ? input.rs1.mantissa | input.rs2.mantissa)
-      val yMantissaUnshifted = U"1" @@ (rs1ExponentBigger ? input.rs2.mantissa | input.rs1.mantissa)
-      val yMantissa = yMantissaUnshifted >> shiftBy
+      val xMantissa = U"1" @@ (rs1ExponentBigger ? input.rs1.mantissa | input.rs2.mantissa) @@ U"0"
+      val yMantissaUnshifted = U"1" @@ (rs1ExponentBigger ? input.rs2.mantissa | input.rs1.mantissa) @@ U"0"
+      val yMantissa = yMantissaUnshifted >> (passThrough.asUInt @@ shiftBy.resize(log2Up(p.internalMantissaSize)))
       val xyExponent = rs1ExponentBigger ? input.rs1.exponent | input.rs2.exponent
     }
 
@@ -621,8 +696,9 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
       def xySign = shifter.xySign
 
       val xSigned = xMantissa.twoComplement(xSign)
-      val ySigned = yMantissa.twoComplement(ySign)
-      val xyMantissa = U(xSigned +^ ySigned).trim(1 bits)
+//      val ySigned = (yMantissa +^ (yMantissa.lsb && !ySign).asUInt).twoComplement(ySign)
+      val ySigned = ((ySign ## Mux(ySign, ~yMantissa, yMantissa)).asUInt +^ (ySign || yMantissa.lsb).asUInt).asSInt
+      val xyMantissa = U(xSigned + ySigned).trim(1 bits)
     }
 
     val norm = new Area{
@@ -632,16 +708,13 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
 
       val shiftOh = OHMasking.first(xyMantissa.asBools.reverse)
       val shift = OHToUInt(shiftOh)
-      val mantissa = (xyMantissa |<< shift) >> 1
-      val exponent = xyExponent - shift + 1
-      val forceZero = xyMantissa === 0
-      val forceOverflow = exponent === exponent.maxValue
-      val forceNan =
-//      val
-      when(forceZero){ //TODO
-        exponent := 0
-        xySign := False
-      }
+      val mantissa = (xyMantissa |<< shift) >> 2
+//      val mantissaShifted = (xyMantissa |<< shift)
+//      val mantissa = ((xyMantissa ) >> 2) + U(xyMantissa(1))
+      val exponent = xyExponent -^ shift + 1
+      val forceZero = xyMantissa === 0 || exponent.msb || (input.rs1.isZeroOrSubnormal && input.rs2.isZeroOrSubnormal)
+      val forceOverflow = exponent(7 downto 0) === 255 ||  (input.rs1.isInfinity || input.rs2.isInfinity)
+      val forceNan = input.rs1.isNan || input.rs2.isNan || (input.rs1.isInfinity && input.rs2.isInfinity && (input.rs1.sign ^ input.rs2.sign))
     }
 
 
@@ -651,7 +724,17 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
     output.rd     := input.rd
     output.value.sign := norm.xySign
     output.value.mantissa := norm.mantissa.resized
-    output.value.exponent := norm.exponent
+    output.value.exponent := norm.exponent.resized
+    output.value.special := False
+
+    when(norm.forceNan) {
+      output.value.setNanQuiet
+    } elsewhen(norm.forceZero) {
+      output.value.setZero;
+      output.value.sign := False
+    } elsewhen(norm.forceOverflow) {
+      output.value.setInfinity
+    }
   }
 
 
diff --git a/src/main/scala/vexriscv/ip/fpu/Interface.scala b/src/main/scala/vexriscv/ip/fpu/Interface.scala
index 7b4c9cf..42c9bcb 100644
--- a/src/main/scala/vexriscv/ip/fpu/Interface.scala
+++ b/src/main/scala/vexriscv/ip/fpu/Interface.scala
@@ -22,11 +22,20 @@ case class FpuFloatDecoded() extends Bundle{
   val isInfinity = Bool()
   val isQuiet = Bool()
 }
+
+object FpuFloat{
+  val ZERO = 0
+  val SUBNORMAL = 1
+  val INFINITY = 2
+  val NAN = 3
+}
+
 case class FpuFloat(exponentSize: Int,
                     mantissaSize: Int) extends Bundle {
   val mantissa = UInt(mantissaSize bits)
   val exponent = UInt(exponentSize bits)
   val sign = Bool()
+  val special = Bool()
 
   def withInvertSign : FpuFloat ={
     val ret = FpuFloat(exponentSize,mantissaSize)
@@ -37,7 +46,34 @@ case class FpuFloat(exponentSize: Int,
   }
 
 
+  def isZeroOrSubnormal =  special && exponent(1) === False
+
+  def isNormal    = !special
+  def isZero      =  special && exponent(1 downto 0) === 0
+  def isSubnormal =  special && exponent(1 downto 0) === 1
+  def isInfinity  =  special && exponent(1 downto 0) === 2
+  def isNan       =  special && exponent(1 downto 0) === 3
+  def isQuiet     =  mantissa.msb
+
+  def setNormal    =  { special := False }
+  def setZero      =  { special := True; exponent(1 downto 0) := 0 }
+  def setSubnormal =  { special := True; exponent(1 downto 0) := 1 }
+  def setInfinity  =  { special := True; exponent(1 downto 0) := 2 }
+  def setNan       =  { special := True; exponent(1 downto 0) := 3 }
+  def setNanQuiet  =  { special := True; exponent(1 downto 0) := 3; mantissa.msb := True }
+
   def decode() = {
+    val ret = FpuFloatDecoded()
+    ret.isZero      := isZero
+    ret.isSubnormal := isSubnormal
+    ret.isNormal    := isNormal
+    ret.isInfinity  := isInfinity
+    ret.isNan       := isNan
+    ret.isQuiet     := mantissa.msb
+    ret
+  }
+
+  def decodeIeee754() = {
     val ret = FpuFloatDecoded()
     val expZero = exponent === 0
     val expOne = exponent === exponent.maxValue
@@ -46,7 +82,7 @@ case class FpuFloat(exponentSize: Int,
     ret.isSubnormal := expZero && !manZero
     ret.isNormal := !expOne && !expZero
     ret.isInfinity := expOne && manZero
-    ret.isNan := expOne && !manZero// && !sign
+    ret.isNan := expOne && !manZero
     ret.isQuiet := mantissa.msb
     ret
   }
diff --git a/src/main/scala/vexriscv/plugin/FpuPlugin.scala b/src/main/scala/vexriscv/plugin/FpuPlugin.scala
index 082ddd2..4d639ef 100644
--- a/src/main/scala/vexriscv/plugin/FpuPlugin.scala
+++ b/src/main/scala/vexriscv/plugin/FpuPlugin.scala
@@ -174,7 +174,7 @@ class FpuPlugin(externalFpu : Boolean = false,
       port.cmd.opcode   := input(FPU_OPCODE)
       port.cmd.value    := RegNext(output(RS1))
       port.cmd.arg      := input(FPU_ARG)
-      port.cmd.rs1      := input(INSTRUCTION)(rs1Range).asUInt
+      port.cmd.rs1      := ((input(FPU_OPCODE) === FpuOpcode.STORE) ? input(INSTRUCTION)(rs2Range).asUInt | input(INSTRUCTION)(rs1Range).asUInt)
       port.cmd.rs2      := input(INSTRUCTION)(rs2Range).asUInt
       port.cmd.rs3      := input(INSTRUCTION)(rs3Range).asUInt
       port.cmd.rd       := input(INSTRUCTION)(rdRange).asUInt
diff --git a/src/test/scala/vexriscv/ip/fpu/FpuTest.scala b/src/test/scala/vexriscv/ip/fpu/FpuTest.scala
index 3b8c78d..23dc1d3 100644
--- a/src/test/scala/vexriscv/ip/fpu/FpuTest.scala
+++ b/src/test/scala/vexriscv/ip/fpu/FpuTest.scala
@@ -14,6 +14,10 @@ import scala.util.Random
 
 class FpuTest extends FunSuite{
 
+  val b2f = lang.Float.intBitsToFloat(_)
+  def clamp(f : Float) = {
+    if(f.abs < b2f(0x00800000)) 0.0f*f.signum else f
+  }
 
   test("directed"){
     val portCount = 1
@@ -81,8 +85,8 @@ class FpuTest extends FunSuite{
           cmdQueue += {cmd =>
             cmd.opcode #= cmd.opcode.spinalEnum.STORE
             cmd.value.randomize()
-            cmd.rs1.randomize()
-            cmd.rs2 #= rs
+            cmd.rs1 #= rs
+            cmd.rs2.randomize()
             cmd.rs3.randomize()
             cmd.rd.randomize()
             cmd.arg.randomize()
@@ -92,7 +96,7 @@ class FpuTest extends FunSuite{
         }
 
         def storeFloat(rs : Int)(body : Float => Unit): Unit ={
-          storeRaw(rs){rsp => body(lang.Float.intBitsToFloat(rsp.value.toLong.toInt))}
+          storeRaw(rs){rsp => body(b2f(rsp.value.toLong.toInt))}
         }
 
         def mul(rd : Int, rs1 : Int, rs2 : Int): Unit ={
@@ -304,9 +308,19 @@ class FpuTest extends FunSuite{
           }
         }
         def checkFloat(ref : Float, dut : Float): Boolean ={
-          if(ref === dut) return  true
-          ref.abs * 1.0001 > dut.abs && ref.abs * 0.9999 < dut.abs && ref.signum == dut.signum
+          if(ref.signum != dut.signum === dut) return  false
+          if(ref.isNaN && dut.isNaN) return true
+          if(ref == dut) return true
+          if(ref.abs * 1.0001 > dut.abs && ref.abs * 0.9999 < dut.abs && ref.signum == dut.signum) return true
+          false
         }
+        def checkFloatExact(ref : Float, dut : Float): Boolean ={
+          if(ref.signum != dut.signum === dut) return  false
+          if(ref.isNaN && dut.isNaN) return true
+          if(ref == dut) return true
+          false
+        }
+
 
         def randomFloat(): Float ={
           val exp = Random.nextInt(10)-5
@@ -322,7 +336,9 @@ class FpuTest extends FunSuite{
 
           add(rd,rs1,rs2)
           storeFloat(rd){v =>
-            val ref = a+b
+            val a_ = clamp(a)
+            val b_ = clamp(b)
+            val ref = clamp(a_ + b_)
             println(f"$a + $b = $v, $ref")
             assert(checkFloat(ref, v))
           }
@@ -450,7 +466,7 @@ class FpuTest extends FunSuite{
           val rd = Random.nextInt(32)
           fmv_w_x(rd, a)
           storeFloat(rd){v =>
-            val ref = lang.Float.intBitsToFloat(a)
+            val ref = b2f(a)
             println(f"fmv_w_x $a = $v, $ref")
             assert(v === ref)
           }
@@ -488,16 +504,35 @@ class FpuTest extends FunSuite{
           }
         }
 
-
-        val b2f = lang.Float.intBitsToFloat(_)
+        //Todo negative
+        def withMinus(that : Seq[Float]) = that.flatMap(f => List(f, -f))
+        val fZeros = withMinus(List(0.0f))
+        val fSubnormals = withMinus(List(b2f(0x00000000+1), b2f(0x00000000+2), b2f(0x00800000-2), b2f(0x00800000-1)))
+        val fExpSmall = withMinus(List(b2f(0x00800000), b2f(0x00800000+1), b2f(0x00800000 + 2)))
+        val fExpNormal = withMinus(List(b2f(0x3f800000-2), b2f(0x3f800000-1), b2f(0x3f800000), b2f(0x3f800000+1), b2f(0x3f800000+2)))
+        val fExpBig = withMinus(List(b2f(0x7f7fffff-2), b2f(0x7f7fffff-1), b2f(0x7f7fffff)))
+        val fInfinity = withMinus(List(Float.PositiveInfinity))
+        val fNan = List(Float.NaN, b2f(0x7f820000), b2f(0x7fc00000))
+        val fAll = fZeros ++ fSubnormals ++ fExpSmall ++ fExpNormal ++ fExpBig ++ fInfinity ++ fNan
 
 
+        testAdd(b2f(0x3f800000), b2f(0x3f800000-1))
+        testAdd(1.1f, 2.3f)
         testAdd(1.2f, -1.2f)
         testAdd(-1.2f, 1.2f)
         testAdd(0.0f, -1.2f)
         testAdd(-0.0f, -1.2f)
         testAdd(1.2f, -0f)
         testAdd(1.2f, 0f)
+        testAdd(1.1f, Float.MinPositiveValue)
+
+        for(a <- fAll; _ <- 0 until 50) testAdd(a, randomFloat())
+        for(b <- fAll; _ <- 0 until 50) testAdd(randomFloat(), b)
+        for(a <- fAll; b <- fAll) testAdd(a, b)
+        for(_ <- 0 until 1000) testAdd(randomFloat(), randomFloat())
+
+//        dut.clockDomain.waitSampling(10000000)
+
 
         testFmv_x_w(1.246f)
         testFmv_w_x(lang.Float.floatToIntBits(7.234f))
@@ -590,9 +625,7 @@ class FpuTest extends FunSuite{
         testDiv(1.0f, b2f(0x3f800001))
         testDiv(1.0f, b2f(0x3f800002))
 
-        for(i <- 0 until 1000){
-          testAdd(randomFloat(), randomFloat())
-        }
+
         for(i <- 0 until 1000){
           testMul(randomFloat(), randomFloat())
         }
diff --git a/src/test/scala/vexriscv/ip/fpu/Playground.scala b/src/test/scala/vexriscv/ip/fpu/Playground.scala
index f5df144..a155210 100644
--- a/src/test/scala/vexriscv/ip/fpu/Playground.scala
+++ b/src/test/scala/vexriscv/ip/fpu/Playground.scala
@@ -42,4 +42,6 @@ object MiaouNan extends App{
   println(3.0f + Float.NaN )
   println(0.0f*Float.PositiveInfinity )
   println(1.0f/0.0f )
+  println(Float.MaxValue -1 )
+  println(Float.PositiveInfinity - Float.PositiveInfinity)
 }
\ No newline at end of file