From 4bd637cf88875be4c1533f838955f34e2b5401bd Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Fri, 22 Jan 2021 14:55:37 +0100 Subject: [PATCH] fpu add now support special floats values and better rounding --- src/main/scala/vexriscv/ip/fpu/FpuCore.scala | 135 ++++++++++++++---- .../scala/vexriscv/ip/fpu/Interface.scala | 38 ++++- .../scala/vexriscv/plugin/FpuPlugin.scala | 2 +- src/test/scala/vexriscv/ip/fpu/FpuTest.scala | 57 ++++++-- .../scala/vexriscv/ip/fpu/Playground.scala | 2 + 5 files changed, 194 insertions(+), 40 deletions(-) diff --git a/src/main/scala/vexriscv/ip/fpu/FpuCore.scala b/src/main/scala/vexriscv/ip/fpu/FpuCore.scala index 9d39fdf..bf5749b 100644 --- a/src/main/scala/vexriscv/ip/fpu/FpuCore.scala +++ b/src/main/scala/vexriscv/ip/fpu/FpuCore.scala @@ -44,7 +44,6 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{ case class LoadInput() extends Bundle{ val source = Source() - val rs1 = p.internalFloating() val rd = p.rfAddress() val lockId = lockIdType() } @@ -175,7 +174,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{ val useRs1, useRs2, useRs3, useRd = False switch(s0.opcode){ is(p.Opcode.LOAD) { useRd := True } - is(p.Opcode.STORE) { useRs2 := True } + is(p.Opcode.STORE) { useRs1 := True } is(p.Opcode.ADD) { useRd := True; useRs1 := True; useRs2 := True } is(p.Opcode.MUL) { useRd := True; useRs1 := True; useRs2 := True } is(p.Opcode.DIV) { useRd := True; useRs1 := True; useRs2 := True } @@ -288,13 +287,39 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{ val filtred = commitFork.load.map(port => port.takeWhen(port.load)) def feed = filtred(input.source) val hazard = !feed.valid + + val f32Mantissa = feed.value(0, 23 bits).asUInt + val f32Exponent = feed.value(23, 8 bits).asUInt + val f32Sign = feed.value(31) + + val expZero = f32Exponent === 0 + val expOne = f32Exponent === 255 + val manZero = f32Mantissa === 0 + + val isZero = expZero && manZero + val isSubnormal = expZero && !manZero + val isNormal = !expOne && !expZero + val isInfinity = expOne && manZero + val isNan = expOne && !manZero + val isQuiet = f32Mantissa.msb + + val recoded = p.internalFloating() + recoded.mantissa := f32Mantissa + recoded.exponent := f32Exponent + recoded.sign := f32Sign + recoded.setNormal + when(isZero){recoded.setZero} + when(isSubnormal){recoded.setSubnormal} + when(isInfinity){recoded.setInfinity} + when(isNan){recoded.setNan} + val output = input.haltWhen(hazard).swapPayload(WriteInput()) filtred.foreach(_.ready := False) feed.ready := input.valid && output.ready output.source := input.source output.lockId := input.lockId output.rd := input.rd - output.value.assignFromBits(feed.value) + output.value := recoded } val shortPip = new Area{ @@ -303,7 +328,25 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{ val rfOutput = Stream(WriteInput()) val result = p.storeLoadType().assignDontCare() - val storeResult = input.rs2.asBits + + val recoded = CombInit(input.rs1) + when(recoded.special){ + switch(input.rs1.exponent(1 downto 0)){ + is(FpuFloat.ZERO){ + recoded.mantissa.clearAll() + recoded.exponent.clearAll() + } + is(FpuFloat.INFINITY){ + recoded.mantissa.clearAll() + recoded.exponent.setAll() + } + is(FpuFloat.NAN){ + recoded.exponent.setAll() + } + } + } + + val recodedResult = recoded.asBits.resize(32 bits) val f2iShift = input.rs1.exponent - U(exponentOne) val f2iShifted = (U"1" @@ input.rs1.mantissa) << (f2iShift.resize(5 bits)) @@ -324,6 +367,33 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{ 3 -> (!rs1AbsSmaller && !rs1Equal) ) + val rawToFpu = new Area{ + val f32Mantissa = input.value(0, 23 bits).asUInt + val f32Exponent = input.value(23, 8 bits).asUInt + val f32Sign = input.value(31) + + val expZero = f32Exponent === 0 + val expOne = f32Exponent === 255 + val manZero = f32Mantissa === 0 + + val isZero = expZero && manZero + val isSubnormal = expZero && !manZero + val isNormal = !expOne && !expZero + val isInfinity = expOne && manZero + val isNan = expOne && !manZero + val isQuiet = f32Mantissa.msb + + val recoded = p.internalFloating() + recoded.mantissa := f32Mantissa + recoded.exponent := f32Exponent + recoded.sign := f32Sign + recoded.setNormal + when(isZero){recoded.setZero} + when(isSubnormal){recoded.setSubnormal} + when(isInfinity){recoded.setInfinity} + when(isNan){recoded.setNan} + } + val minMaxResult = (rs1Smaller ^ input.arg(0)) ? input.rs1 | input.rs2 val cmpResult = B(rs1Smaller && !input.arg(1) || rs1Equal && !input.arg(0)) val sgnjResult = (input.rs1.sign && input.arg(1)) ^ input.rs2.sign ^ input.arg(0) @@ -342,10 +412,10 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{ switch(input.opcode){ - is(FpuOpcode.STORE) { result := storeResult } + is(FpuOpcode.STORE) { result := recodedResult } + is(FpuOpcode.FMV_X_W) { result := recodedResult } //TODO is(FpuOpcode.F2I) { result := f2iResult } is(FpuOpcode.CMP) { result := cmpResult.resized } //TODO - is(FpuOpcode.FMV_X_W) { result := input.rs1.asBits } //TODO is(FpuOpcode.FCLASS) { result := fclassResult.resized } } @@ -361,6 +431,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{ rfOutput.value.sign := i2fSign rfOutput.value.exponent := i2fLog2 +^ exponentOne rfOutput.value.mantissa := U(i2fShifted).resized + rfOutput.value.special := False //TODO } is(FpuOpcode.MIN_MAX){ rfOutput.value := minMaxResult @@ -369,9 +440,10 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{ rfOutput.value.sign := sgnjResult rfOutput.value.exponent := input.rs1.exponent rfOutput.value.mantissa := input.rs1.mantissa + rfOutput.value.special := False //TODO } is(FpuOpcode.FMV_W_X){ - rfOutput.value.assignFromBits(input.value) //TODO + rfOutput.value := rawToFpu.recoded } } @@ -403,6 +475,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{ output.sign := input.rs1.sign ^ input.rs2.sign output.exponent := exp.resized output.mantissa := man + output.special := False //TODO } val notMul = new Area{ @@ -423,6 +496,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{ decode.mulToAdd.rs1.mantissa := norm.output.mantissa decode.mulToAdd.rs1.exponent := norm.output.exponent decode.mulToAdd.rs1.sign := norm.output.sign + decode.mulToAdd.rs1.special := False //TODO decode.mulToAdd.rs2 := input.rs3 decode.mulToAdd.rd := input.rd decode.mulToAdd.lockId := input.lockId @@ -595,20 +669,21 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{ val input = decode.add.stage() val shifter = new Area { - val exp21 = input.rs2.exponent - input.rs1.exponent - val rs1ExponentBigger = exp21.msb + val exp21 = input.rs2.exponent -^ input.rs1.exponent + val rs1ExponentBigger = exp21.msb || input.rs2.isZeroOrSubnormal val rs1ExponentEqual = input.rs1.exponent === input.rs2.exponent val rs1MantissaBigger = input.rs1.mantissa > input.rs2.mantissa - val absRs1Bigger = rs1ExponentBigger|| rs1ExponentEqual && rs1MantissaBigger + val absRs1Bigger = ((rs1ExponentBigger || rs1ExponentEqual && rs1MantissaBigger) && !input.rs1.isZeroOrSubnormal || input.rs1.isInfinity) && !input.rs2.isInfinity val shiftBy = rs1ExponentBigger ? (0-exp21) | exp21 + val passThrough = shiftBy >= p.internalMantissaSize || (input.rs1.isZeroOrSubnormal) || (input.rs2.isZeroOrSubnormal) //Note that rs1ExponentBigger can be replaced by absRs1Bigger bellow to avoid xsigned two complement in math block at expense of combinatorial path val xySign = absRs1Bigger ? input.rs1.sign | input.rs2.sign val xSign = xySign ^ (rs1ExponentBigger ? input.rs1.sign | input.rs2.sign) val ySign = xySign ^ (rs1ExponentBigger ? input.rs2.sign | input.rs1.sign) - val xMantissa = U"1" @@ (rs1ExponentBigger ? input.rs1.mantissa | input.rs2.mantissa) - val yMantissaUnshifted = U"1" @@ (rs1ExponentBigger ? input.rs2.mantissa | input.rs1.mantissa) - val yMantissa = yMantissaUnshifted >> shiftBy + val xMantissa = U"1" @@ (rs1ExponentBigger ? input.rs1.mantissa | input.rs2.mantissa) @@ U"0" + val yMantissaUnshifted = U"1" @@ (rs1ExponentBigger ? input.rs2.mantissa | input.rs1.mantissa) @@ U"0" + val yMantissa = yMantissaUnshifted >> (passThrough.asUInt @@ shiftBy.resize(log2Up(p.internalMantissaSize))) val xyExponent = rs1ExponentBigger ? input.rs1.exponent | input.rs2.exponent } @@ -621,8 +696,9 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{ def xySign = shifter.xySign val xSigned = xMantissa.twoComplement(xSign) - val ySigned = yMantissa.twoComplement(ySign) - val xyMantissa = U(xSigned +^ ySigned).trim(1 bits) +// val ySigned = (yMantissa +^ (yMantissa.lsb && !ySign).asUInt).twoComplement(ySign) + val ySigned = ((ySign ## Mux(ySign, ~yMantissa, yMantissa)).asUInt +^ (ySign || yMantissa.lsb).asUInt).asSInt + val xyMantissa = U(xSigned + ySigned).trim(1 bits) } val norm = new Area{ @@ -632,16 +708,13 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{ val shiftOh = OHMasking.first(xyMantissa.asBools.reverse) val shift = OHToUInt(shiftOh) - val mantissa = (xyMantissa |<< shift) >> 1 - val exponent = xyExponent - shift + 1 - val forceZero = xyMantissa === 0 - val forceOverflow = exponent === exponent.maxValue - val forceNan = -// val - when(forceZero){ //TODO - exponent := 0 - xySign := False - } + val mantissa = (xyMantissa |<< shift) >> 2 +// val mantissaShifted = (xyMantissa |<< shift) +// val mantissa = ((xyMantissa ) >> 2) + U(xyMantissa(1)) + val exponent = xyExponent -^ shift + 1 + val forceZero = xyMantissa === 0 || exponent.msb || (input.rs1.isZeroOrSubnormal && input.rs2.isZeroOrSubnormal) + val forceOverflow = exponent(7 downto 0) === 255 || (input.rs1.isInfinity || input.rs2.isInfinity) + val forceNan = input.rs1.isNan || input.rs2.isNan || (input.rs1.isInfinity && input.rs2.isInfinity && (input.rs1.sign ^ input.rs2.sign)) } @@ -651,7 +724,17 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{ output.rd := input.rd output.value.sign := norm.xySign output.value.mantissa := norm.mantissa.resized - output.value.exponent := norm.exponent + output.value.exponent := norm.exponent.resized + output.value.special := False + + when(norm.forceNan) { + output.value.setNanQuiet + } elsewhen(norm.forceZero) { + output.value.setZero; + output.value.sign := False + } elsewhen(norm.forceOverflow) { + output.value.setInfinity + } } diff --git a/src/main/scala/vexriscv/ip/fpu/Interface.scala b/src/main/scala/vexriscv/ip/fpu/Interface.scala index 7b4c9cf..42c9bcb 100644 --- a/src/main/scala/vexriscv/ip/fpu/Interface.scala +++ b/src/main/scala/vexriscv/ip/fpu/Interface.scala @@ -22,11 +22,20 @@ case class FpuFloatDecoded() extends Bundle{ val isInfinity = Bool() val isQuiet = Bool() } + +object FpuFloat{ + val ZERO = 0 + val SUBNORMAL = 1 + val INFINITY = 2 + val NAN = 3 +} + case class FpuFloat(exponentSize: Int, mantissaSize: Int) extends Bundle { val mantissa = UInt(mantissaSize bits) val exponent = UInt(exponentSize bits) val sign = Bool() + val special = Bool() def withInvertSign : FpuFloat ={ val ret = FpuFloat(exponentSize,mantissaSize) @@ -37,7 +46,34 @@ case class FpuFloat(exponentSize: Int, } + def isZeroOrSubnormal = special && exponent(1) === False + + def isNormal = !special + def isZero = special && exponent(1 downto 0) === 0 + def isSubnormal = special && exponent(1 downto 0) === 1 + def isInfinity = special && exponent(1 downto 0) === 2 + def isNan = special && exponent(1 downto 0) === 3 + def isQuiet = mantissa.msb + + def setNormal = { special := False } + def setZero = { special := True; exponent(1 downto 0) := 0 } + def setSubnormal = { special := True; exponent(1 downto 0) := 1 } + def setInfinity = { special := True; exponent(1 downto 0) := 2 } + def setNan = { special := True; exponent(1 downto 0) := 3 } + def setNanQuiet = { special := True; exponent(1 downto 0) := 3; mantissa.msb := True } + def decode() = { + val ret = FpuFloatDecoded() + ret.isZero := isZero + ret.isSubnormal := isSubnormal + ret.isNormal := isNormal + ret.isInfinity := isInfinity + ret.isNan := isNan + ret.isQuiet := mantissa.msb + ret + } + + def decodeIeee754() = { val ret = FpuFloatDecoded() val expZero = exponent === 0 val expOne = exponent === exponent.maxValue @@ -46,7 +82,7 @@ case class FpuFloat(exponentSize: Int, ret.isSubnormal := expZero && !manZero ret.isNormal := !expOne && !expZero ret.isInfinity := expOne && manZero - ret.isNan := expOne && !manZero// && !sign + ret.isNan := expOne && !manZero ret.isQuiet := mantissa.msb ret } diff --git a/src/main/scala/vexriscv/plugin/FpuPlugin.scala b/src/main/scala/vexriscv/plugin/FpuPlugin.scala index 082ddd2..4d639ef 100644 --- a/src/main/scala/vexriscv/plugin/FpuPlugin.scala +++ b/src/main/scala/vexriscv/plugin/FpuPlugin.scala @@ -174,7 +174,7 @@ class FpuPlugin(externalFpu : Boolean = false, port.cmd.opcode := input(FPU_OPCODE) port.cmd.value := RegNext(output(RS1)) port.cmd.arg := input(FPU_ARG) - port.cmd.rs1 := input(INSTRUCTION)(rs1Range).asUInt + port.cmd.rs1 := ((input(FPU_OPCODE) === FpuOpcode.STORE) ? input(INSTRUCTION)(rs2Range).asUInt | input(INSTRUCTION)(rs1Range).asUInt) port.cmd.rs2 := input(INSTRUCTION)(rs2Range).asUInt port.cmd.rs3 := input(INSTRUCTION)(rs3Range).asUInt port.cmd.rd := input(INSTRUCTION)(rdRange).asUInt diff --git a/src/test/scala/vexriscv/ip/fpu/FpuTest.scala b/src/test/scala/vexriscv/ip/fpu/FpuTest.scala index 3b8c78d..23dc1d3 100644 --- a/src/test/scala/vexriscv/ip/fpu/FpuTest.scala +++ b/src/test/scala/vexriscv/ip/fpu/FpuTest.scala @@ -14,6 +14,10 @@ import scala.util.Random class FpuTest extends FunSuite{ + val b2f = lang.Float.intBitsToFloat(_) + def clamp(f : Float) = { + if(f.abs < b2f(0x00800000)) 0.0f*f.signum else f + } test("directed"){ val portCount = 1 @@ -81,8 +85,8 @@ class FpuTest extends FunSuite{ cmdQueue += {cmd => cmd.opcode #= cmd.opcode.spinalEnum.STORE cmd.value.randomize() - cmd.rs1.randomize() - cmd.rs2 #= rs + cmd.rs1 #= rs + cmd.rs2.randomize() cmd.rs3.randomize() cmd.rd.randomize() cmd.arg.randomize() @@ -92,7 +96,7 @@ class FpuTest extends FunSuite{ } def storeFloat(rs : Int)(body : Float => Unit): Unit ={ - storeRaw(rs){rsp => body(lang.Float.intBitsToFloat(rsp.value.toLong.toInt))} + storeRaw(rs){rsp => body(b2f(rsp.value.toLong.toInt))} } def mul(rd : Int, rs1 : Int, rs2 : Int): Unit ={ @@ -304,9 +308,19 @@ class FpuTest extends FunSuite{ } } def checkFloat(ref : Float, dut : Float): Boolean ={ - if(ref === dut) return true - ref.abs * 1.0001 > dut.abs && ref.abs * 0.9999 < dut.abs && ref.signum == dut.signum + if(ref.signum != dut.signum === dut) return false + if(ref.isNaN && dut.isNaN) return true + if(ref == dut) return true + if(ref.abs * 1.0001 > dut.abs && ref.abs * 0.9999 < dut.abs && ref.signum == dut.signum) return true + false } + def checkFloatExact(ref : Float, dut : Float): Boolean ={ + if(ref.signum != dut.signum === dut) return false + if(ref.isNaN && dut.isNaN) return true + if(ref == dut) return true + false + } + def randomFloat(): Float ={ val exp = Random.nextInt(10)-5 @@ -322,7 +336,9 @@ class FpuTest extends FunSuite{ add(rd,rs1,rs2) storeFloat(rd){v => - val ref = a+b + val a_ = clamp(a) + val b_ = clamp(b) + val ref = clamp(a_ + b_) println(f"$a + $b = $v, $ref") assert(checkFloat(ref, v)) } @@ -450,7 +466,7 @@ class FpuTest extends FunSuite{ val rd = Random.nextInt(32) fmv_w_x(rd, a) storeFloat(rd){v => - val ref = lang.Float.intBitsToFloat(a) + val ref = b2f(a) println(f"fmv_w_x $a = $v, $ref") assert(v === ref) } @@ -488,16 +504,35 @@ class FpuTest extends FunSuite{ } } - - val b2f = lang.Float.intBitsToFloat(_) + //Todo negative + def withMinus(that : Seq[Float]) = that.flatMap(f => List(f, -f)) + val fZeros = withMinus(List(0.0f)) + val fSubnormals = withMinus(List(b2f(0x00000000+1), b2f(0x00000000+2), b2f(0x00800000-2), b2f(0x00800000-1))) + val fExpSmall = withMinus(List(b2f(0x00800000), b2f(0x00800000+1), b2f(0x00800000 + 2))) + val fExpNormal = withMinus(List(b2f(0x3f800000-2), b2f(0x3f800000-1), b2f(0x3f800000), b2f(0x3f800000+1), b2f(0x3f800000+2))) + val fExpBig = withMinus(List(b2f(0x7f7fffff-2), b2f(0x7f7fffff-1), b2f(0x7f7fffff))) + val fInfinity = withMinus(List(Float.PositiveInfinity)) + val fNan = List(Float.NaN, b2f(0x7f820000), b2f(0x7fc00000)) + val fAll = fZeros ++ fSubnormals ++ fExpSmall ++ fExpNormal ++ fExpBig ++ fInfinity ++ fNan + testAdd(b2f(0x3f800000), b2f(0x3f800000-1)) + testAdd(1.1f, 2.3f) testAdd(1.2f, -1.2f) testAdd(-1.2f, 1.2f) testAdd(0.0f, -1.2f) testAdd(-0.0f, -1.2f) testAdd(1.2f, -0f) testAdd(1.2f, 0f) + testAdd(1.1f, Float.MinPositiveValue) + + for(a <- fAll; _ <- 0 until 50) testAdd(a, randomFloat()) + for(b <- fAll; _ <- 0 until 50) testAdd(randomFloat(), b) + for(a <- fAll; b <- fAll) testAdd(a, b) + for(_ <- 0 until 1000) testAdd(randomFloat(), randomFloat()) + +// dut.clockDomain.waitSampling(10000000) + testFmv_x_w(1.246f) testFmv_w_x(lang.Float.floatToIntBits(7.234f)) @@ -590,9 +625,7 @@ class FpuTest extends FunSuite{ testDiv(1.0f, b2f(0x3f800001)) testDiv(1.0f, b2f(0x3f800002)) - for(i <- 0 until 1000){ - testAdd(randomFloat(), randomFloat()) - } + for(i <- 0 until 1000){ testMul(randomFloat(), randomFloat()) } diff --git a/src/test/scala/vexriscv/ip/fpu/Playground.scala b/src/test/scala/vexriscv/ip/fpu/Playground.scala index f5df144..a155210 100644 --- a/src/test/scala/vexriscv/ip/fpu/Playground.scala +++ b/src/test/scala/vexriscv/ip/fpu/Playground.scala @@ -42,4 +42,6 @@ object MiaouNan extends App{ println(3.0f + Float.NaN ) println(0.0f*Float.PositiveInfinity ) println(1.0f/0.0f ) + println(Float.MaxValue -1 ) + println(Float.PositiveInfinity - Float.PositiveInfinity) } \ No newline at end of file