From b6eda1ad7a1e7611e9221192f6ea79274c22a22b Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Thu, 11 Feb 2021 16:07:47 +0100 Subject: [PATCH] fpu f64 load/store/mv/mul seems ok --- src/main/scala/vexriscv/ip/fpu/FpuCore.scala | 112 ++++-- src/test/scala/vexriscv/ip/fpu/FpuTest.scala | 400 +++++++++++++------ 2 files changed, 346 insertions(+), 166 deletions(-) diff --git a/src/main/scala/vexriscv/ip/fpu/FpuCore.scala b/src/main/scala/vexriscv/ip/fpu/FpuCore.scala index dd4352e..edb6dba 100644 --- a/src/main/scala/vexriscv/ip/fpu/FpuCore.scala +++ b/src/main/scala/vexriscv/ip/fpu/FpuCore.scala @@ -21,6 +21,8 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{ val exponentOne = (1 << p.internalExponentSize-1) - 1 val exponentF32Subnormal = exponentOne-127 val exponentF64Subnormal = exponentOne-1023 + val exponentF32Infinity = exponentOne+127+1 + val exponentF64Infinity = exponentOne+1023+1 val rfLockCount = 5 val lockIdType = HardType(UInt(log2Up(rfLockCount) bits)) @@ -30,6 +32,11 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{ if(!p.withDouble) no } + def muxDouble[T <: Data](format : FpuFormat.C)(yes : => T)(no : => T): T ={ + if(p.withDouble) ((format === FpuFormat.DOUBLE) ? { yes } | { no }) + else no + } + case class RfReadInput() extends Bundle{ val source = Source() val opcode = p.Opcode() @@ -254,11 +261,16 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{ output.rs3 := rs3Entry.value if(p.withDouble){ output.format := s1.format - when(s1.format === FpuFormat.FLOAT =/= rs1Entry.boxed){ + val store = s1.opcode === FpuOpcode.STORE ||s1.opcode === FpuOpcode.FMV_X_W + when(store){ //Pass through + output.format := rs1Entry.boxed ? FpuFormat.FLOAT | FpuFormat.DOUBLE + } elsewhen(s1.format === FpuFormat.FLOAT =/= rs1Entry.boxed){ output.rs1.setNanQuiet + output.rs1.sign := False } when(s1.format === FpuFormat.FLOAT =/= rs2Entry.boxed){ output.rs2.setNanQuiet + output.rs2.sign := False } when(s1.format === FpuFormat.FLOAT =/= rs3Entry.boxed){ output.rs3.setNanQuiet @@ -364,7 +376,13 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{ output.i2f := input.i2f output.arg := input.arg output.roundMode := input.roundMode - if(p.withDouble) output.format := input.format + if(p.withDouble) { + output.format := input.format + when(!input.i2f && input.format === FpuFormat.DOUBLE && output.value(63 downto 32).andR){ //Detect boxing + output.format := FpuFormat.FLOAT + } + } + } val s1 = new Area{ @@ -378,25 +396,34 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{ } val f64 = p.withDouble generate new Area{ val mantissa = input.value(0, 52 bits).asUInt - val exponent = input.value(11, 52 bits).asUInt + val exponent = input.value(52, 11 bits).asUInt val sign = input.value(63) } + val recodedExpOffset = UInt(p.internalExponentSize bits) val passThroughFloat = p.internalFloating() passThroughFloat.special := False - passThroughFloat.sign := f32.sign - passThroughFloat.exponent := f32.exponent.resized - passThroughFloat.mantissa := f32.mantissa << (if(p.withDouble) 29 else 0) - if(p.withDouble) when(input.format === FpuFormat.DOUBLE){ + + whenDouble(input.format){ passThroughFloat.sign := f64.sign passThroughFloat.exponent := f64.exponent.resized passThroughFloat.mantissa := f64.mantissa + recodedExpOffset := exponentF64Subnormal + } { + passThroughFloat.sign := f32.sign + passThroughFloat.exponent := f32.exponent.resized + passThroughFloat.mantissa := f32.mantissa << (if (p.withDouble) 29 else 0) + recodedExpOffset := exponentF32Subnormal } + val manZero = passThroughFloat.mantissa === 0 val expZero = passThroughFloat.exponent === 0 val expOne = passThroughFloat.exponent(7 downto 0).andR - if(p.withDouble) expOne.clearWhen(input.format === FpuFormat.DOUBLE && !passThroughFloat.exponent(11 downto 8).andR) + if(p.withDouble) { + expZero.clearWhen(input.format === FpuFormat.DOUBLE && input.value(62 downto 60) =/= 0) + expOne.clearWhen(input.format === FpuFormat.DOUBLE && input.value(62 downto 60) =/= 7) + } val isZero = expZero && manZero val isSubnormal = expZero && !manZero @@ -409,9 +436,10 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{ val ohInputWidth = 32 max p.internalMantissaSize val ohInput = Bits(ohInputWidth bits).assignDontCare() when(!input.i2f) { - if(!p.withDouble) ohInput(ohInputWidth-23, 23 bits) := input.value(0, 23 bits) + if(!p.withDouble) ohInput := input.value(0, 23 bits) << 9 if( p.withDouble) ohInput := passThroughFloat.mantissa.asBits } otherwise { + ohInput(ohInputWidth-32-1 downto 0) := 0 ohInput(ohInputWidth-32, 32 bits) := input.value(31 downto 0) } @@ -426,15 +454,15 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{ } val output = RegNextWhen(logic, !done) } - shift.input := (input.value.asUInt |<< 1).resized + shift.input := (ohInput.asUInt |<< 1).resized - val subnormalShiftOffset = if(!p.withDouble) U(9) else ((input.format === FpuFormat.DOUBLE) ? U(0) | U(0)) - val subnormalExpOffset = if(!p.withDouble) U(9) else ((input.format === FpuFormat.DOUBLE) ? U(0) | U(0)) + val subnormalShiftOffset = if(!p.withDouble) U(0) else ((input.format === FpuFormat.DOUBLE) ? U(0) | U(0)) //TODO remove ? + val subnormalExpOffset = if(!p.withDouble) U(0) else ((input.format === FpuFormat.DOUBLE) ? U(0) | U(0)) when(input.valid && (input.i2f || isSubnormal) && !done){ busy := True when(boot){ - when(input.i2f && !patched && input.value.msb && input.arg(0)){ + when(input.i2f && !patched && input.value(31) && input.arg(0)){ input.value.getDrivingReg(0, 32 bits) := B(input.value.asUInt.twoComplement(True).resize(32 bits)) patched := True } otherwise { @@ -467,7 +495,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{ val recoded = p.internalFloating() recoded.mantissa := passThroughFloat.mantissa - recoded.exponent := (passThroughFloat.exponent -^ fsm.expOffset + exponentF32Subnormal).resized + recoded.exponent := (passThroughFloat.exponent -^ fsm.expOffset + recodedExpOffset).resized recoded.sign := passThroughFloat.sign recoded.setNormal when(isZero){recoded.setZero} @@ -480,9 +508,6 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{ output.roundMode := input.roundMode if(p.withDouble) { output.format := input.format - when(!input.i2f && input.format === FpuFormat.DOUBLE && input.value(63 downto 23).andR){ //Detect boxing - output.format := FpuFormat.FLOAT - } } output.rd := input.rd output.value.sign := recoded.sign @@ -523,9 +548,15 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{ val exp = (input.rs1.exponent - (exponentOne-1023)).resize(11 bits) val man = CombInit(input.rs1.mantissa) } - recodedResult := (if(p.withDouble) B"xFFFFFFFF" else B"") ## input.rs1.sign ## f32.exp ## f32.man - val expInSubnormalRange = input.rs1.exponent <= exponentOne - 127 + whenDouble(input.format){ + recodedResult := input.rs1.sign ## f64.exp ## f64.man + } { + recodedResult := (if(p.withDouble) B"xFFFFFFFF" else B"") ## input.rs1.sign ## f32.exp ## f32.man + } + + val expSubnormalThreshold = muxDouble[UInt](input.format)(exponentF64Subnormal)(exponentF32Subnormal) + val expInSubnormalRange = input.rs1.exponent <= expSubnormalThreshold val isSubnormal = !input.rs1.special && expInSubnormalRange val isNormal = !input.rs1.special && !expInSubnormalRange val fsm = new Area{ @@ -552,14 +583,14 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{ shift.input := (U(!isZero) @@ input.rs1.mantissa) << (if(p.withDouble) 0 else 9) - + val formatShiftOffset = muxDouble[UInt](input.format)(exponentOne-1023+1)(exponentOne - (if(p.withDouble) (127+34) else (127-10))) when(input.valid && (needRecoding || isF2i) && !done){ halt := True when(boot){ when(isF2i){ - shift.by := (U(exponentOne + 31) - input.rs1.exponent).min(U(33)).resized //TODO merge + shift.by := ((U(exponentOne + 31) - input.rs1.exponent).min(U(33)) + (if(p.withDouble) 20 else 0)).resized //TODO merge } otherwise { - shift.by := (U(exponentOne - 127+10) - input.rs1.exponent).resized + shift.by := (formatShiftOffset - input.rs1.exponent).resized } boot := False } otherwise { @@ -619,7 +650,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{ when(mantissaForced){ recodedResult(0,23 bits) := (default -> mantissaForcedValue) whenDouble(input.format){ - recodedResult(52-23, 52-23 bits) := (default -> exponentForcedValue) + recodedResult(23, 52-23 bits) := (default -> mantissaForcedValue) }{} } when(exponentForced){ @@ -764,10 +795,6 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{ } val norm = new Area{ -// val needShift = math.mulC.msb -// val exp = math.exp + U(needShift) -// val man = needShift ? math.mulC(p.internalMantissaSize + 1, p.internalMantissaSize bits) | math.mulC(p.internalMantissaSize, p.internalMantissaSize bits) - val (mulHigh, mulLow) = math.mulC.splitAt(p.internalMantissaSize-1) val scrap = mulLow =/= 0 val needShift = mulHigh.msb @@ -775,7 +802,9 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{ val man = needShift ? mulHigh(1, p.internalMantissaSize+1 bits) | mulHigh(0, p.internalMantissaSize+1 bits) scrap setWhen(needShift && mulHigh(0)) val forceZero = input.rs1.isZero || input.rs2.isZero - val forceUnderflow = exp < exponentOne + exponentOne - 127 - 24 // 0x6A //TODO + val underflowThreshold = muxDouble[UInt](input.format)(exponentOne + exponentOne - 1023 - 53) (exponentOne + exponentOne - 127 - 24) + val underflowExp = muxDouble[UInt](input.format)(exponentOne - 1023 - 54) (exponentOne - 127 - 25) + val forceUnderflow = exp < underflowThreshold val forceOverflow = input.rs1.isInfinity || input.rs2.isInfinity val infinitynan = ((input.rs1.isInfinity || input.rs2.isInfinity) && (input.rs1.isZero || input.rs2.isZero)) val forceNan = input.rs1.isNan || input.rs2.isNan || infinitynan @@ -797,7 +826,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{ } elsewhen(forceZero) { output.setZero } elsewhen(forceUnderflow) { - output.exponent := exponentOne - 127 - 25 + output.exponent := underflowExp.resized } } @@ -1123,11 +1152,14 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{ val round = new Area{ val input = merge.commited.combStage - //TODO do not break NAN payload (seems already fine) val manAggregate = input.value.mantissa @@ input.scrap - val expDif = (exponentOne-126) -^ input.value.exponent + val expBase = muxDouble[UInt](input.format)(exponentF64Subnormal+1)(exponentF32Subnormal+1) + val expDif = expBase -^ input.value.exponent val expSubnormal = !expDif.msb - val discardCount = expSubnormal ? expDif.resize(log2Up(p.internalMantissaSize) bits) | U(0) + var discardCount = (expSubnormal ? expDif.resize(log2Up(p.internalMantissaSize) bits) | U(0)) + if(p.withDouble) when(input.format === FpuFormat.FLOAT){ + discardCount \= discardCount + 29 + } val exactMask = (List(True) ++ (0 until p.internalMantissaSize+1).map(_ < discardCount)).asBits.asUInt val roundAdjusted = (True ## (manAggregate>>1))(discardCount) ## ((manAggregate & exactMask) =/= 0) @@ -1156,10 +1188,16 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{ // uf := True // } - when(!math.special && math.exponent <= exponentOne-127 && roundAdjusted.asUInt =/= 0){ //Do not catch exact 1.17549435E-38 underflow, but, who realy care ? + + + val ufSubnormalThreshold = muxDouble[UInt](input.format)(exponentF64Subnormal)(exponentF32Subnormal) + val ufThreshold = muxDouble[UInt](input.format)(exponentF64Subnormal-52+1)(exponentF32Subnormal-23+1) + val ofThreshold = muxDouble[UInt](input.format)(exponentF64Infinity-1)(exponentF32Infinity-1) + + when(!math.special && math.exponent <= ufSubnormalThreshold && roundAdjusted.asUInt =/= 0){ //Do not catch exact 1.17549435E-38 underflow, but, who realy care ? uf := True } - when(!math.special && math.exponent >= exponentOne + 128){ + when(!math.special && math.exponent > ofThreshold){ nx := True of := True val doMax = input.roundMode.mux( @@ -1170,7 +1208,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{ FpuRoundMode.RMM -> (False) ) when(doMax){ - patched.exponent := exponentOne + 127 + patched.exponent := ofThreshold patched.mantissa.setAll() } otherwise { patched.setInfinity @@ -1178,7 +1216,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{ } - when(!math.special && math.exponent <= exponentOne - 127-23){ + when(!math.special && math.exponent < ufThreshold){ nx := True uf := True val doMin = input.roundMode.mux( @@ -1189,7 +1227,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{ FpuRoundMode.RMM -> (False) ) when(doMin){ - patched.exponent := exponentOne - 127-23+1 + patched.exponent := ufThreshold.resized patched.mantissa := 0 } otherwise { patched.setZero diff --git a/src/test/scala/vexriscv/ip/fpu/FpuTest.scala b/src/test/scala/vexriscv/ip/fpu/FpuTest.scala index 6d5b495..79ea6b8 100644 --- a/src/test/scala/vexriscv/ip/fpu/FpuTest.scala +++ b/src/test/scala/vexriscv/ip/fpu/FpuTest.scala @@ -23,7 +23,17 @@ import scala.util.Random class FpuTest extends FunSuite{ val b2f = lang.Float.intBitsToFloat(_) + val b2d = lang.Double.longBitsToDouble(_) val f2b = lang.Float.floatToRawIntBits(_) + val d2bOffset = BigInt("10000000000000000",16) + def d2b(that : Double) = { + val l = lang.Double.doubleToRawLongBits(that) + var a = BigInt(l) + if(l < 0) { + a = d2bOffset + a + } + a + } test("f32f64") { @@ -64,24 +74,6 @@ class FpuTest extends FunSuite{ def f32_f32_f32 ={ val s = new Scanner(next) val a,b,c = (s.nextLong(16).toInt) -// if(b2f(a).isNaN || b2f(b).isNaN){ -// print("NAN => ") -// if(((a >> 23) & 0xFF) == 0xFF && ((a >> 0) & 0xEFFFFF) != 0){ -// print(a.toHexString) -// print(" " + f2b(b2f(a)).toHexString) -// } -// if(((b >> 23) & 0xFF) == 0xFF && ((b >> 0) & 0xEFFFFF) != 0){ -// print(b.toHexString) -// print(" " + f2b(b2f(b)).toHexString) -// } -// if(((c >> 23) & 0xFF) == 0xFF && ((c >> 0) & 0xEFFFFF) != 0){ -// print(" " + c.toHexString) -// print(" " + f2b(b2f(c)).toHexString) -// } -// -// print(" " + simTime()) -// println("") -// } (b2f(a), b2f(b), b2f(c), s.nextInt(16)) } @@ -105,8 +97,39 @@ class FpuTest extends FunSuite{ val s = new Scanner(next) val a,b = (s.nextLong(16).toInt) (b2f(a), b2f(b), s.nextInt(16)) - } + } + + def nextLong(s : Scanner) : Long = java.lang.Long.parseUnsignedLong( s.next(),16) + + def f64_f64_f64 ={ + val s = new Scanner(next) + val a,b,c = nextLong(s) + (b2d(a), b2d(b), b2d(c), s.nextInt(16)) + } + + def i32_f64 ={ + val s = new Scanner(next) + (s.nextLong(16).toInt, b2d(nextLong(s)), s.nextInt(16)) + } + + def f64_i32 = { + val s = new Scanner(next) + (b2d(nextLong(s)), s.nextLong(16).toInt, s.nextInt(16)) + } + + def f64_f64_i32 = { + val str = next + val s = new Scanner(str) + val a,b,c = (nextLong(s)) + (b2d(a), b2d(b), c, s.nextInt(16)) + } + + def f64_f64 = { + val s = new Scanner(next) + val a,b = (s.nextLong(16)) + (b2d(a), b2d(b), s.nextInt(16)) + } } lazy val RAW = build("") lazy val RNE = build("-rnear_even") @@ -125,28 +148,33 @@ class FpuTest extends FunSuite{ } } - val f32 = new { - val add = new TestCase("f32_add") - val sub = new TestCase("f32_sub") - val mul = new TestCase("f32_mul") - val ui2f = new TestCase("ui32_to_f32") - val i2f = new TestCase("i32_to_f32") - val f2ui = new TestCase("f32_to_ui32 -exact") - val f2i = new TestCase("f32_to_i32 -exact") - val eq = new TestCase("f32_eq") - val lt = new TestCase("f32_lt") - val le = new TestCase("f32_le") - val min = new TestCase("f32_le") - val max = new TestCase("f32_lt") - val transfer = new TestCase("f32_eq") - val fclass = new TestCase("f32_eq") - val sgnj = new TestCase("f32_eq") - val sgnjn = new TestCase("f32_eq") - val sgnjx = new TestCase("f32_eq") - val sqrt = new TestCase("f32_sqrt") - val div = new TestCase("f32_div") + class TestVector(f : String) { + val add = new TestCase(s"${f}_add") + val sub = new TestCase(s"${f}_sub") + val mul = new TestCase(s"${f}_mul") + val ui2f = new TestCase(s"ui32_to_${f}") + val i2f = new TestCase(s"i32_to_${f}") + val f2ui = new TestCase(s"${f}_to_ui32 -exact") + val f2i = new TestCase(s"${f}_to_i32 -exact") + val eq = new TestCase(s"${f}_eq") + val lt = new TestCase(s"${f}_lt") + val le = new TestCase(s"${f}_le") + val min = new TestCase(s"${f}_le") + val max = new TestCase(s"${f}_lt") + val transfer = new TestCase(s"${f}_eq") + val fclass = new TestCase(s"${f}_eq") + val sgnj = new TestCase(s"${f}_eq") + val sgnjn = new TestCase(s"${f}_eq") + val sgnjx = new TestCase(s"${f}_eq") + val sqrt = new TestCase(s"${f}_sqrt") + val div = new TestCase(s"${f}_div") + val f32 = new TestCase(s"${f}_eq") + val f64 = new TestCase(s"${f}_eq") } + val f32 = new TestVector("f32") + val f64 = new TestVector("f64") + val cpus = for(id <- 0 until portCount) yield new { val cmdQueue = mutable.Queue[FpuCmd => Unit]() val commitQueue = mutable.Queue[FpuCommit => Unit]() @@ -165,9 +193,15 @@ class FpuTest extends FunSuite{ val patch = if(value.abs == 1.17549435E-38f) ref & ~2 else ref flagMatch(patch, report) } + + def flagMatch(ref : Int, value : Double, report : String): Unit ={ + val patch = if(value.abs == b2d(1 << 52)) ref & ~2 else ref + flagMatch(patch, report) + } + def flagMatch(ref : Int, report : String): Unit ={ waitUntil(pendingMiaou == 0) - assert(flagAccumulator == ref, s"Flag missmatch dut=$flagAccumulator ref=$ref $report") + softAssert(flagAccumulator == ref, s"Flag missmatch dut=$flagAccumulator ref=$ref $report") flagAccumulator = 0 } def flagClear(): Unit ={ @@ -231,6 +265,10 @@ class FpuTest extends FunSuite{ loadRaw(rd, f2b(value).toLong & 0xFFFFFFFFl, FpuFormat.FLOAT) } + def load(rd : Int, value : Double): Unit ={ + loadRaw(rd, d2b(value), FpuFormat.DOUBLE) + } + def storeRaw(rs : Int, format : FpuFormat.E)(body : FpuRsp => Unit): Unit ={ cmdAdd {cmd => cmd.opcode #= cmd.opcode.spinalEnum.STORE @@ -250,8 +288,11 @@ class FpuTest extends FunSuite{ def storeFloat(rs : Int)(body : Float => Unit): Unit ={ storeRaw(rs, FpuFormat.FLOAT){rsp => body(b2f(rsp.value.toBigInt.toInt))} } + def store(rs : Int)(body : Double => Unit): Unit ={ + storeRaw(rs, FpuFormat.DOUBLE){rsp => body(b2d(rsp.value.toBigInt.toLong))} + } - def fpuF2f(rd : Int, rs1 : Int, rs2 : Int, rs3 : Int, opcode : FpuOpcode.E, arg : Int, rounding : FpuRoundMode.E = FpuRoundMode.RNE): Unit ={ + def fpuF2f(rd : Int, rs1 : Int, rs2 : Int, rs3 : Int, opcode : FpuOpcode.E, arg : Int, rounding : FpuRoundMode.E, format : FpuFormat.E): Unit ={ cmdAdd {cmd => cmd.opcode #= opcode cmd.rs1 #= rs1 @@ -260,6 +301,7 @@ class FpuTest extends FunSuite{ cmd.rd #= rd cmd.arg #= arg cmd.roundMode #= rounding + cmd.format #= format } commitQueue += {cmd => cmd.write #= true @@ -267,7 +309,7 @@ class FpuTest extends FunSuite{ } } - def fpuF2i(rs1 : Int, rs2 : Int, opcode : FpuOpcode.E, arg : Int, rounding : FpuRoundMode.E = FpuRoundMode.RNE)(body : FpuRsp => Unit): Unit ={ + def fpuF2i(rs1 : Int, rs2 : Int, opcode : FpuOpcode.E, arg : Int, rounding : FpuRoundMode.E, format : FpuFormat.E)(body : FpuRsp => Unit): Unit ={ cmdAdd {cmd => cmd.opcode #= opcode cmd.rs1 #= rs1 @@ -276,58 +318,59 @@ class FpuTest extends FunSuite{ cmd.rd.randomize() cmd.arg #= arg cmd.roundMode #= rounding + cmd.format #= format } rspQueue += body } - def mul(rd : Int, rs1 : Int, rs2 : Int, rounding : FpuRoundMode.E = FpuRoundMode.RNE): Unit ={ - fpuF2f(rd, rs1, rs2, Random.nextInt(32), FpuOpcode.MUL, 0, rounding) + def mul(rd : Int, rs1 : Int, rs2 : Int, rounding : FpuRoundMode.E, format : FpuFormat.E): Unit ={ + fpuF2f(rd, rs1, rs2, Random.nextInt(32), FpuOpcode.MUL, 0, rounding, format) } - def add(rd : Int, rs1 : Int, rs2 : Int, rounding : FpuRoundMode.E = FpuRoundMode.RNE): Unit ={ - fpuF2f(rd, rs1, rs2, Random.nextInt(32), FpuOpcode.ADD, 0, rounding) + def add(rd : Int, rs1 : Int, rs2 : Int, rounding : FpuRoundMode.E = FpuRoundMode.RNE, format : FpuFormat.E): Unit ={ + fpuF2f(rd, rs1, rs2, Random.nextInt(32), FpuOpcode.ADD, 0, rounding, format) } - def sub(rd : Int, rs1 : Int, rs2 : Int, rounding : FpuRoundMode.E = FpuRoundMode.RNE): Unit ={ - fpuF2f(rd, rs1, rs2, Random.nextInt(32), FpuOpcode.ADD, 1, rounding) + def sub(rd : Int, rs1 : Int, rs2 : Int, rounding : FpuRoundMode.E = FpuRoundMode.RNE, format : FpuFormat.E): Unit ={ + fpuF2f(rd, rs1, rs2, Random.nextInt(32), FpuOpcode.ADD, 1, rounding, format) } - def div(rd : Int, rs1 : Int, rs2 : Int, rounding : FpuRoundMode.E = FpuRoundMode.RNE): Unit ={ - fpuF2f(rd, rs1, rs2, Random.nextInt(32), FpuOpcode.DIV, Random.nextInt(4), rounding) + def div(rd : Int, rs1 : Int, rs2 : Int, rounding : FpuRoundMode.E = FpuRoundMode.RNE, format : FpuFormat.E): Unit ={ + fpuF2f(rd, rs1, rs2, Random.nextInt(32), FpuOpcode.DIV, Random.nextInt(4), rounding, format) } - def sqrt(rd : Int, rs1 : Int, rounding : FpuRoundMode.E = FpuRoundMode.RNE): Unit ={ - fpuF2f(rd, rs1, Random.nextInt(32), Random.nextInt(32), FpuOpcode.SQRT, Random.nextInt(4), rounding) + def sqrt(rd : Int, rs1 : Int, rounding : FpuRoundMode.E = FpuRoundMode.RNE, format : FpuFormat.E): Unit ={ + fpuF2f(rd, rs1, Random.nextInt(32), Random.nextInt(32), FpuOpcode.SQRT, Random.nextInt(4), rounding, format) } - def fma(rd : Int, rs1 : Int, rs2 : Int, rs3 : Int, rounding : FpuRoundMode.E = FpuRoundMode.RNE): Unit ={ - fpuF2f(rd, rs1, rs2, rs3, FpuOpcode.FMA, 0, rounding) + def fma(rd : Int, rs1 : Int, rs2 : Int, rs3 : Int, rounding : FpuRoundMode.E, format : FpuFormat.E): Unit ={ + fpuF2f(rd, rs1, rs2, rs3, FpuOpcode.FMA, 0, rounding, format) } - def sgnjRaw(rd : Int, rs1 : Int, rs2 : Int, arg : Int): Unit ={ - fpuF2f(rd, rs1, rs2, Random.nextInt(32), FpuOpcode.SGNJ, arg, FpuRoundMode.elements.randomPick()) + def sgnjRaw(rd : Int, rs1 : Int, rs2 : Int, arg : Int, format : FpuFormat.E): Unit ={ + fpuF2f(rd, rs1, rs2, Random.nextInt(32), FpuOpcode.SGNJ, arg, FpuRoundMode.elements.randomPick(), format) } - def sgnj(rd : Int, rs1 : Int, rs2 : Int, rounding : FpuRoundMode.E = null): Unit ={ - sgnjRaw(rd, rs1, rs2, 0) + def sgnj(rd : Int, rs1 : Int, rs2 : Int, rounding : FpuRoundMode.E = null, format : FpuFormat.E): Unit ={ + sgnjRaw(rd, rs1, rs2, 0, format) } - def sgnjn(rd : Int, rs1 : Int, rs2 : Int, rounding : FpuRoundMode.E = null): Unit ={ - sgnjRaw(rd, rs1, rs2, 1) + def sgnjn(rd : Int, rs1 : Int, rs2 : Int, rounding : FpuRoundMode.E = null, format : FpuFormat.E): Unit ={ + sgnjRaw(rd, rs1, rs2, 1, format) } - def sgnjx(rd : Int, rs1 : Int, rs2 : Int, rounding : FpuRoundMode.E = null): Unit ={ - sgnjRaw(rd, rs1, rs2, 2) + def sgnjx(rd : Int, rs1 : Int, rs2 : Int, rounding : FpuRoundMode.E = null, format : FpuFormat.E): Unit ={ + sgnjRaw(rd, rs1, rs2, 2, format) } - def cmp(rs1 : Int, rs2 : Int, arg : Int = 1)(body : FpuRsp => Unit): Unit ={ - fpuF2i(rs1, rs2, FpuOpcode.CMP, arg, FpuRoundMode.elements.randomPick())(body) + def cmp(rs1 : Int, rs2 : Int, arg : Int, format : FpuFormat.E)(body : FpuRsp => Unit): Unit ={ + fpuF2i(rs1, rs2, FpuOpcode.CMP, arg, FpuRoundMode.elements.randomPick(), format)(body) } - def f2i(rs1 : Int, signed : Boolean, rounding : FpuRoundMode.E = FpuRoundMode.RNE)(body : FpuRsp => Unit): Unit ={ - fpuF2i(rs1, Random.nextInt(32), FpuOpcode.F2I, if(signed) 1 else 0, rounding)(body) + def f2i(rs1 : Int, signed : Boolean, rounding : FpuRoundMode.E, format : FpuFormat.E)(body : FpuRsp => Unit): Unit ={ + fpuF2i(rs1, Random.nextInt(32), FpuOpcode.F2I, if(signed) 1 else 0, rounding, format)(body) } - def i2f(rd : Int, value : Int, signed : Boolean, rounding : FpuRoundMode.E): Unit ={ + def i2f(rd : Int, value : Int, signed : Boolean, rounding : FpuRoundMode.E, format : FpuFormat.E): Unit ={ cmdAdd {cmd => cmd.opcode #= cmd.opcode.spinalEnum.I2F cmd.rs1.randomize() @@ -336,6 +379,7 @@ class FpuTest extends FunSuite{ cmd.rd #= rd cmd.arg #= (if(signed) 1 else 0) cmd.roundMode #= rounding + cmd.format #= format } commitQueue += {cmd => cmd.write #= true @@ -451,13 +495,13 @@ class FpuTest extends FunSuite{ } - def testBinaryOp(op : (Int,Int,Int,FpuRoundMode.E) => Unit, a : Float, b : Float, ref : Float, flag : Int, rounding : FpuRoundMode.E, opName : String): Unit ={ + def testBinaryOp(op : (Int,Int,Int,FpuRoundMode.E, FpuFormat.E) => Unit, a : Float, b : Float, ref : Float, flag : Int, rounding : FpuRoundMode.E, opName : String): Unit ={ val rs = new RegAllocator() val rs1, rs2, rs3 = rs.allocate() val rd = Random.nextInt(32) load(rs1, a) load(rs2, b) - op(rd,rs1,rs2, rounding) + op(rd,rs1,rs2, rounding, FpuFormat.FLOAT) storeFloat(rd){v => assert(f2b(v) == f2b(ref), f"## ${a} ${opName} $b = $v, $ref $rounding") } @@ -466,12 +510,25 @@ class FpuTest extends FunSuite{ } + def testBinaryOpF64(op : (Int,Int,Int,FpuRoundMode.E, FpuFormat.E) => Unit, a : Double, b : Double, ref : Double, flag : Int, rounding : FpuRoundMode.E, opName : String): Unit ={ + val rs = new RegAllocator() + val rs1, rs2, rs3 = rs.allocate() + val rd = Random.nextInt(32) + load(rs1, a) + load(rs2, b) + op(rd,rs1,rs2, rounding, FpuFormat.DOUBLE) + store(rd){v => + assert(d2b(v) == d2b(ref), f"## ${a} ${opName} $b = $v, $ref $rounding") + } - def testTransferRaw(a : Float, iSrc : Boolean, iDst : Boolean): Unit ={ + flagMatch(flag, ref, f"## ${opName} ${a} $b $ref $rounding") + } + + + def testTransferF32Raw(a : Float, iSrc : Boolean, iDst : Boolean): Unit ={ val rd = Random.nextInt(32) def handle(v : Float): Unit ={ - val refUnclamped = a val ref = a assert(f2b(v) == f2b(ref), f"$a = $v, $ref") } @@ -482,6 +539,49 @@ class FpuTest extends FunSuite{ flagMatch(0, f"$a") } + + def testTransferF64Raw(a : Double): Unit ={ + val rd = Random.nextInt(32) + + def handle(v : Double): Unit ={ + val ref = a + assert(d2b(v) == d2b(ref), f"$a = $v, $ref") + } + + load(rd, a) + store(rd)(handle) + + flagMatch(0, f"$a") + } + + def testTransferF32F64Raw(a : Float, iSrc : Boolean): Unit ={ + val rd = Random.nextInt(32) + if(iSrc) fmv_w_x(rd, f2b(a)) else load(rd, a) + storeRaw(rd, FpuFormat.DOUBLE){rsp => + val v = rsp.value.toBigInt.toLong + val ref = (0xFFFFFFFFl << 32) | f2b(a) + assert(v == ref, f"$a = $v, $ref") + } + flagMatch(0, f"$a") + } + + def testTransferF64F32Raw(a : Double, iDst : Boolean): Unit ={ + val rd = Random.nextInt(32) + load(rd, a) + if(iDst)fmv_x_w(rd){v_ => + val v = f2b(v_).toLong & 0xFFFFFFFFl + val ref = d2b(a) & 0xFFFFFFFFl + assert(v == ref, f"$a = $v, $ref") + } + else storeRaw(rd, FpuFormat.FLOAT){rsp => + val v = rsp.value.toBigInt.toLong & 0xFFFFFFFFl + val ref = d2b(a) & 0xFFFFFFFFl + assert(v == ref, f"$a = $v, $ref") + } + flagMatch(0, f"$a") + } + + def testClassRaw(a : Float) : Unit = { val rd = Random.nextInt(32) @@ -513,7 +613,7 @@ class FpuTest extends FunSuite{ load(rs2, b) load(rs3, c) - fma(rd,rs1,rs2,rs3) + fma(rd,rs1,rs2,rs3, FpuRoundMode.RNE, FpuFormat.FLOAT) storeFloat(rd){v => val ref = a.toDouble * b.toDouble + c.toDouble println(f"$a%.20f * $b%.20f + $c%.20f = $v%.20f, $ref%.20f") @@ -530,7 +630,7 @@ class FpuTest extends FunSuite{ load(rs1, a) load(rs2, b) - div(rd,rs1,rs2) + div(rd,rs1,rs2, FpuRoundMode.RNE, FpuFormat.FLOAT) storeFloat(rd){v => val refUnclamped = a/b val refClamped = ((a)/(b)) @@ -547,7 +647,7 @@ class FpuTest extends FunSuite{ val rd = Random.nextInt(32) load(rs1, a) - sqrt(rd,rs1) + sqrt(rd,rs1, FpuRoundMode.RNE, FpuFormat.FLOAT) storeFloat(rd){v => val ref = Math.sqrt(a).toFloat val error = Math.abs(ref-v)/ref @@ -564,7 +664,7 @@ class FpuTest extends FunSuite{ val rd = Random.nextInt(32) load(rs1, a) - sqrt(rd,rs1) + sqrt(rd,rs1, FpuRoundMode.RNE, FpuFormat.FLOAT) storeFloat(rd){v => val error = Math.abs(ref-v)/ref println(f"sqrt($a) = $v, $ref $error $rounding") @@ -579,7 +679,7 @@ class FpuTest extends FunSuite{ load(rs1, a) load(rs2, b) - div(rd,rs1, rs2) + div(rd,rs1, rs2, FpuRoundMode.RNE, FpuFormat.FLOAT) storeFloat(rd){v => val error = Math.abs(ref-v)/ref println(f"div($a, $b) = $v, $ref $error $rounding") @@ -594,16 +694,16 @@ class FpuTest extends FunSuite{ val rs1 = rs.allocate() val rd = Random.nextInt(32) load(rs1, a) - f2i(rs1, signed, rounding){rsp => + f2i(rs1, signed, rounding, FpuFormat.FLOAT){rsp => if(signed) { - val v = rsp.value.toLong.toInt + val v = rsp.value.toBigInt.toInt var ref2 = ref if(a >= Int.MaxValue) ref2 = Int.MaxValue if(a <= Int.MinValue) ref2 = Int.MinValue if(a.isNaN) ref2 = Int.MaxValue assert(v == (ref2), f" <= f2i($a) = $v, $ref2, $rounding, $flag") } else { - val v = rsp.value.toLong + val v = rsp.value.toBigInt.toLong & 0xFFFFFFFFl var ref2 = ref.toLong & 0xFFFFFFFFl if(a < 0) ref2 = 0 if(a >= 0xFFFFFFFFl) ref2 = 0xFFFFFFFFl @@ -621,15 +721,15 @@ class FpuTest extends FunSuite{ def testI2fExact(a : Int, b : Float, f : Int, signed : Boolean, rounding : FpuRoundMode.E): Unit ={ val rs = new RegAllocator() val rd = Random.nextInt(32) - i2f(rd, a, signed, rounding) + i2f(rd, a, signed, rounding, FpuFormat.FLOAT) storeFloat(rd){v => val aLong = if(signed) a.toLong else a.toLong & 0xFFFFFFFFl val ref = b - assert(f2b(v) == f2b(ref), f"i2f($aLong) = $v, $ref") + assert(f2b(v) == f2b(ref), f"i2f($aLong) = $v, $ref $rounding") } - flagMatch(f, b, f"i2f() = $b") + flagMatch(f, b, f"i2f($a) = $b") } @@ -640,7 +740,7 @@ class FpuTest extends FunSuite{ val rd = Random.nextInt(32) load(rs1, a) load(rs2, b) - cmp(rs1, rs2, arg){rsp => + cmp(rs1, rs2, arg, FpuFormat.FLOAT){rsp => val v = rsp.value.toLong assert(v === ref, f"cmp($a, $b, $arg) = $v, $ref") } @@ -744,29 +844,6 @@ class FpuTest extends FunSuite{ } } - - -// for(i <- 0 until 64){ -// val rounding = FpuRoundMode.RMM -// val a = 24f -// val b = b2f(0x3f800000+i) -// val c = Clib.math.mulF32(a, b, rounding.position) -// val f = 0 -// testMulExact(a,b,c,f, rounding) -// } - - val binaryOps = List[(Int,Int,Int,FpuRoundMode.E) => Unit](add, sub, mul) - -// testSqrt(0.0f) - // testSqrt(1.2f) - // for(a <- fAll) testSqrt(a) -// for(_ <- 0 until 1000) testSqrt(randomFloat()) - - - - - - def testFma() : Unit = { testFmaRaw(randomFloat(), randomFloat(), randomFloat()) flagClear() @@ -786,13 +863,13 @@ class FpuTest extends FunSuite{ testEqRaw(a,b,i, f) } - def testF2ui() : Unit = { + def testF2uiF32() : Unit = { val rounding = FpuRoundMode.elements.randomPick() val (a,b,f) = f32.f2ui(rounding).f32_i32 testF2iExact(a,b, f, false, rounding) } - def testF2i() : Unit = { + def testF2iF32() : Unit = { val rounding = FpuRoundMode.elements.randomPick() val (a,b,f) = f32.f2i(rounding).f32_i32 testF2iExact(a,b, f, true, rounding) @@ -823,11 +900,26 @@ class FpuTest extends FunSuite{ testSgnjxRaw(a, b) } - def testTransfer() : Unit = { + def testTransferF32() : Unit = { val (a,b,r,f) = f32.transfer.RAW.f32_f32_i32 - testTransferRaw(a, Random.nextBoolean(), Random.nextBoolean()) + testTransferF32Raw(a, Random.nextBoolean(), Random.nextBoolean()) } + def testTransferF64() : Unit = { + val (a,b,r,f) = f64.transfer.RAW.f64_f64_i32 + testTransferF64Raw(a) + } + + def testTransferF64F32() : Unit = { + val (a,b,r,f) = f64.f32.RAW.f64_f64_i32 + testTransferF64F32Raw(a, Random.nextBoolean()) + } + def testTransferF32F64() : Unit = { + val (a,b,r,f) = f32.f64.RAW.f32_f32_i32 + testTransferF32F64Raw(a, Random.nextBoolean()) + } + + def testClass() : Unit = { val (a,b,r,f) = f32.fclass.RAW.f32_f32_i32 testClassRaw(a) @@ -854,59 +946,112 @@ class FpuTest extends FunSuite{ testI2fExact(a,b,f, false, rounding) } - def testMul() : Unit = { + def testMulF32() : Unit = { val rounding = FpuRoundMode.elements.randomPick() val (a,b,c,f) = f32.mul(rounding).f32_f32_f32 testBinaryOp(mul,a,b,c,f, rounding,"mul") } - def testAdd() : Unit = { + def testAddF32() : Unit = { val rounding = FpuRoundMode.elements.randomPick() val (a,b,c,f) = f32.add(rounding).f32_f32_f32 testBinaryOp(add,a,b,c,f, rounding,"add") } - def testSub() : Unit = { + def testSubF32() : Unit = { val rounding = FpuRoundMode.elements.randomPick() val (a,b,c,f) = f32.sub(rounding).f32_f32_f32 testBinaryOp(sub,a,b,c,f, rounding,"sub") } + def testMulF64() : Unit = { + val rounding = FpuRoundMode.elements.randomPick() + val (a,b,c,f) = f64.mul(rounding).f64_f64_f64 + testBinaryOpF64(mul,a,b,c,f, rounding,"mul") + } - val f32Tests = List[() => Unit](testSub, testAdd, testMul, testI2f, testUI2f, testMin, testMax, testSgnj, testTransfer, testDiv, testSqrt, testF2i, testF2ui, testLe, testEq, testLt, testClass, testFma) + + val f32Tests = List[() => Unit](testSubF32, testAddF32, testMulF32, testI2f, testUI2f, testMin, testMax, testSgnj, testTransferF32, testDiv, testSqrt, testF2iF32, testF2uiF32, testLe, testEq, testLt, testClass, testFma) + + //TODO test boxing + if(p.withDouble) { +// for(_ <- 0 until 10000) testUI2f64() +// for(_ <- 0 until 10000) testI2f64() +// println("f64 i2f done") +// +// for(_ <- 0 until 10000) testF2uiF64() +// for(_ <- 0 until 10000) testF2iF64() +// println("f64 f2i done") + +// testF2iExact(1.0f,1, 0, false, FpuRoundMode.RTZ) +// testF2iExact(2.0f,2, 0, false, FpuRoundMode.RTZ) +// testF2iExact(2.5f,2, 1, false, FpuRoundMode.RTZ) + testBinaryOpF64(mul,1.0, 1.0, 1.0,0 , FpuRoundMode.RNE,"mul") + testBinaryOpF64(mul,1.0, 2.0, 2.0,0 , FpuRoundMode.RNE,"mul") + testBinaryOpF64(mul,2.5, 2.0, 5.0,0 , FpuRoundMode.RNE,"mul") - testTransferRaw(1.0f, false, false) - testTransferRaw(2.0f, false, false) - testTransferRaw(2.5f, false, false) - testTransferRaw(6.97949770801e-39f, false, false) - testTransferRaw(8.72437213501e-40f, false, false) - testTransferRaw(5.6E-45f, false, false) + for(_ <- 0 until 10000) testMulF64() + println("f64 Mul done") + testTransferF64Raw(1.0) + testTransferF64Raw(2.0) + testTransferF64Raw(2.5) + testTransferF64Raw(6.97949770801e-39) + testTransferF64Raw(8.72437213501e-40) + testTransferF64Raw(5.6E-45) + testTransferF32F64Raw(b2f(0xFFFF1234), false) + testTransferF64F32Raw(b2d(0xFFF123498765463l << 4), false) + testTransferF32F64Raw(b2f(0xFFFF1234), true) + testTransferF64F32Raw(b2d(0xFFF123498765463l << 4), true) + for (_ <- 0 until 10000) testTransferF64() + println("f64 load/store/rf transfer done") + for (_ <- 0 until 10000) testTransferF64F32() + println("f64 -> f32 load/store/rf transfer done") - for(_ <- 0 until 10000) testTransfer() + for (_ <- 0 until 10000) testTransferF32F64() + println("f32 -> f64 load/store/rf transfer done") + + } + + for(_ <- 0 until 10000) testTransferF32() println("f32 load/store/rf transfer done") - for(_ <- 0 until 10000) testF2ui() - for(_ <- 0 until 10000) testF2i() - println("f2i done") + for(_ <- 0 until 10000) testMulF32() + println("Mul done") + for(_ <- 0 until 10000) testUI2f() for(_ <- 0 until 10000) testI2f() println("i2f done") + testF2iExact(1.0f,1, 0, false, FpuRoundMode.RTZ) + testF2iExact(2.0f,2, 0, false, FpuRoundMode.RTZ) + testF2iExact(2.5f,2, 1, false, FpuRoundMode.RTZ) + + + + + + for(_ <- 0 until 10000) testF2uiF32() + for(_ <- 0 until 10000) testF2iF32() + println("f2i done") + + + // waitUntil(cmdQueue.isEmpty) // dut.clockDomain.waitSampling(1000) // simSuccess() + for(i <- 0 until 1000) testFma() flagClear() println("fma done") //TODO @@ -959,14 +1104,11 @@ class FpuTest extends FunSuite{ - for(_ <- 0 until 10000) testMul() - - println("Mul done") - for(_ <- 0 until 10000) testAdd() - for(_ <- 0 until 10000) testSub() + for(_ <- 0 until 10000) testAddF32() + for(_ <- 0 until 10000) testSubF32() println("Add done")