diff --git a/src/main/scala/vexriscv/ip/fpu/FpuCore.scala b/src/main/scala/vexriscv/ip/fpu/FpuCore.scala index f5350b0..c674bfe 100644 --- a/src/main/scala/vexriscv/ip/fpu/FpuCore.scala +++ b/src/main/scala/vexriscv/ip/fpu/FpuCore.scala @@ -56,6 +56,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{ val arg = p.Arg() val roundMode = FpuRoundMode() val format = p.withDouble generate FpuFormat() + val rs1Boxed, rs2Boxed = p.withDouble generate Bool() } @@ -79,6 +80,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{ val arg = Bits(2 bits) val roundMode = FpuRoundMode() val format = p.withDouble generate FpuFormat() + val rs1Boxed, rs2Boxed = p.withDouble generate Bool() } case class MulInput() extends Bundle{ @@ -198,7 +200,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{ //TODO nan boxing decoding val read = new Area{ - val arbiter = StreamArbiterFactory.noLock.lowerFirst.build(FpuCmd(p), portCount) + val arbiter = StreamArbiterFactory.noLock.roundRobin.build(FpuCmd(p), portCount) arbiter.io.inputs <> Vec(io.port.map(_.cmd)) val s0 = Stream(RfReadInput()) @@ -208,7 +210,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{ val useRs1, useRs2, useRs3, useRd = False switch(s0.opcode){ - is(p.Opcode.LOAD) { useRd := True } + is(p.Opcode.LOAD) { useRd := True } is(p.Opcode.STORE) { useRs1 := True } is(p.Opcode.ADD) { useRd := True; useRs1 := True; useRs2 := True } is(p.Opcode.MUL) { useRd := True; useRs1 := True; useRs2 := True } @@ -261,20 +263,25 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{ output.rs2 := rs2Entry.value output.rs3 := rs3Entry.value if(p.withDouble){ + output.rs1Boxed := rs1Entry.boxed + output.rs2Boxed := rs2Entry.boxed output.format := s1.format val store = s1.opcode === FpuOpcode.STORE ||s1.opcode === FpuOpcode.FMV_X_W - when(store){ //Pass through - output.format := rs1Entry.boxed ? FpuFormat.FLOAT | FpuFormat.DOUBLE - } elsewhen(s1.format === FpuFormat.FLOAT =/= rs1Entry.boxed){ - output.rs1.setNanQuiet - output.rs1.sign := False - } - when(s1.format === FpuFormat.FLOAT =/= rs2Entry.boxed){ - output.rs2.setNanQuiet - output.rs2.sign := False - } - when(s1.format === FpuFormat.FLOAT =/= rs3Entry.boxed){ - output.rs3.setNanQuiet + val sgnjBypass = s1.opcode === FpuOpcode.SGNJ && s1.format === FpuFormat.DOUBLE + when(!sgnjBypass) { + when(store) { //Pass through + output.format := rs1Entry.boxed ? FpuFormat.FLOAT | FpuFormat.DOUBLE + } elsewhen (s1.format === FpuFormat.FLOAT =/= rs1Entry.boxed) { + output.rs1.setNanQuiet + output.rs1.sign := False + } + when(s1.format === FpuFormat.FLOAT =/= rs2Entry.boxed) { + output.rs2.setNanQuiet + output.rs2.sign := False + } + when(s1.format === FpuFormat.FLOAT =/= rs3Entry.boxed) { + output.rs3.setNanQuiet + } } } } @@ -686,8 +693,11 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{ ) val result = (Mux(resign, ~unsigned, unsigned) + (resign ^ increment).asUInt) val overflow = (input.rs1.exponent > (input.arg(0) ? U(exponentOne+30) | U(exponentOne+31)) || input.rs1.isInfinity) && !input.rs1.sign || input.rs1.isNan - val underflow = (input.rs1.exponent > U(exponentOne+31) || input.arg(0) && unsigned.msb && unsigned(30 downto 0) =/= 0 || !input.arg(0) && (unsigned =/= 0 || increment) || input.rs1.isInfinity) && input.rs1.sign + val underflow = (input.rs1.exponent > U(exponentOne+31) || input.arg(0) && unsigned.msb && (unsigned(30 downto 0) =/= 0 || increment) || !input.arg(0) && (unsigned =/= 0 || increment) || input.rs1.isInfinity) && input.rs1.sign val isZero = input.rs1.isZero + if(p.withDouble){ + overflow setWhen(!input.rs1.sign && increment && unsigned(30 downto 0).andR && (input.arg(0) || unsigned(31))) + } when(isZero){ result := 0 } elsewhen(underflow || overflow) { @@ -720,7 +730,13 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{ val minMaxSelectNanQuiet = input.rs1.isNan && input.rs2.isNan val cmpResult = B(rs1Smaller && !bothZero && !input.arg(1) || (rs1Equal || bothZero) && !input.arg(0)) when(input.rs1.isNan || input.rs2.isNan) { cmpResult := 0 } - val sgnjResult = (input.rs1.sign && input.arg(1)) ^ input.rs2.sign ^ input.arg(0) + val sgnjRs1Sign = CombInit(input.rs1.sign) + val sgnjRs2Sign = CombInit(input.rs2.sign) + if(p.withDouble){ + sgnjRs1Sign setWhen(input.rs1Boxed && input.format === FpuFormat.DOUBLE) + sgnjRs2Sign setWhen(input.rs2Boxed && input.format === FpuFormat.DOUBLE) + } + val sgnjResult = (sgnjRs1Sign && input.arg(1)) ^ sgnjRs2Sign ^ input.arg(0) val fclassResult = B(0, 32 bits) val decoded = input.rs1.decode() fclassResult(0) := input.rs1.sign && decoded.isInfinity @@ -771,6 +787,22 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{ } is(FpuOpcode.SGNJ){ rfOutput.value.sign := sgnjResult + if(p.withDouble) when(input.format === FpuFormat.DOUBLE){ + when(input.rs1Boxed){ + rfOutput.value.sign := input.rs1.sign + rfOutput.format := FpuFormat.FLOAT + } +// //kill boxing => F32 -> F64 NAN +// when(input.rs1Boxed && !sgnjResult){ +// rfOutput.value.setNan +// rfOutput.value.mantissa.setAll() +// rfOutput.value.mantissa(31 downto 0) := input.rs1.sign ## input.rs1.exponent +// } +// //Spawn boxing => F64 NAN -> F32 +// when(!input.rs1Boxed && input.rs1.exponent === exponentOne + 1024 && input.rs1.mantissa(32, 52-32 bits).andR && sgnjResult){ +// +// } + } } if(p.withDouble) is(FpuOpcode.FCVT_X_X){ rfOutput.format := ((input.format === FpuFormat.FLOAT) ? FpuFormat.DOUBLE | FpuFormat.FLOAT) diff --git a/src/main/scala/vexriscv/plugin/FpuPlugin.scala b/src/main/scala/vexriscv/plugin/FpuPlugin.scala index 468f14b..855c397 100644 --- a/src/main/scala/vexriscv/plugin/FpuPlugin.scala +++ b/src/main/scala/vexriscv/plugin/FpuPlugin.scala @@ -17,6 +17,7 @@ class FpuPlugin(externalFpu : Boolean = false, object FPU_FORKED extends Stageable(Bool()) object FPU_OPCODE extends Stageable(FpuOpcode()) object FPU_ARG extends Stageable(Bits(2 bits)) + object FPU_FORMAT extends Stageable(FpuFormat()) var port : FpuPort = null @@ -49,6 +50,7 @@ class FpuPlugin(externalFpu : Boolean = false, val fminMax = floatRfWrite :+ FPU_OPCODE -> FpuOpcode.MIN_MAX val fmvWx = floatRfWrite :+ FPU_OPCODE -> FpuOpcode.FMV_W_X :+ RS1_USE -> True val fcvtI2f = floatRfWrite :+ FPU_OPCODE -> FpuOpcode.I2F :+ RS1_USE -> True + val fcvtxx = floatRfWrite :+ FPU_OPCODE -> FpuOpcode.FCVT_X_X val fcmp = intRfWrite :+ FPU_OPCODE -> FpuOpcode.CMP val fclass = intRfWrite :+ FPU_OPCODE -> FpuOpcode.FCLASS @@ -73,35 +75,69 @@ class FpuPlugin(externalFpu : Boolean = false, def arg(v : Int) = FPU_ARG -> U(v, 2 bits) val decoderService = pipeline.service(classOf[DecoderService]) decoderService.addDefault(FPU_ENABLE, False) + + val f32 = FPU_FORMAT -> FpuFormat.FLOAT + val f64 = FPU_FORMAT -> FpuFormat.DOUBLE + decoderService.add(List( - FADD_S -> (addSub :+ arg(0)), - FSUB_S -> (addSub :+ arg(1)), - FMADD_S -> (fma :+ arg(0)), - FMSUB_S -> (fma :+ arg(2)), - FNMADD_S -> (fma :+ arg(3)), - FNMSUB_S -> (fma :+ arg(1)), - FMUL_S -> (mul :+ arg(0)), - FDIV_S -> (div), - FSQRT_S -> (sqrt), - FLW -> (fl), - FSW -> (fs), - FCVT_S_WU -> (fcvtI2f :+ arg(0)), - FCVT_S_W -> (fcvtI2f :+ arg(1)), - FCVT_WU_S -> (fcvtF2i :+ arg(0)), - FCVT_W_S -> (fcvtF2i :+ arg(1)), - FCLASS_S -> (fclass), - FLE_S -> (fcmp :+ arg(0)), - FEQ_S -> (fcmp :+ arg(2)), - FLT_S -> (fcmp :+ arg(1)), - FSGNJ_S -> (fsgnj :+ arg(0)), - FSGNJN_S -> (fsgnj :+ arg(1)), - FSGNJX_S -> (fsgnj :+ arg(2)), - FMIN_S -> (fminMax :+ arg(0)), - FMAX_S -> (fminMax :+ arg(1)), - FMV_X_W -> (fmvXw), - FMV_W_X -> (fmvWx) + FADD_S -> (addSub :+ f32 :+ arg(0)), + FSUB_S -> (addSub :+ f32 :+ arg(1)), + FMADD_S -> (fma :+ f32 :+ arg(0)), + FMSUB_S -> (fma :+ f32 :+ arg(2)), + FNMADD_S -> (fma :+ f32 :+ arg(3)), + FNMSUB_S -> (fma :+ f32 :+ arg(1)), + FMUL_S -> (mul :+ f32 :+ arg(0)), + FDIV_S -> (div :+ f32 ), + FSQRT_S -> (sqrt :+ f32 ), + FLW -> (fl :+ f32 ), + FSW -> (fs :+ f32 ), + FCVT_S_WU -> (fcvtI2f :+ f32 :+ arg(0)), + FCVT_S_W -> (fcvtI2f :+ f32 :+ arg(1)), + FCVT_WU_S -> (fcvtF2i :+ f32 :+ arg(0)), + FCVT_W_S -> (fcvtF2i :+ f32 :+ arg(1)), + FCLASS_S -> (fclass :+ f32 ), + FLE_S -> (fcmp :+ f32 :+ arg(0)), + FEQ_S -> (fcmp :+ f32 :+ arg(2)), + FLT_S -> (fcmp :+ f32 :+ arg(1)), + FSGNJ_S -> (fsgnj :+ f32 :+ arg(0)), + FSGNJN_S -> (fsgnj :+ f32 :+ arg(1)), + FSGNJX_S -> (fsgnj :+ f32 :+ arg(2)), + FMIN_S -> (fminMax :+ f32 :+ arg(0)), + FMAX_S -> (fminMax :+ f32 :+ arg(1)), + FMV_X_W -> (fmvXw :+ f32 ), + FMV_W_X -> (fmvWx :+ f32 ) )) + if(p.withDouble){ + decoderService.add(List( + FADD_D -> (addSub :+ f64 :+ arg(0)), + FSUB_D -> (addSub :+ f64 :+ arg(1)), + FMADD_D -> (fma :+ f64 :+ arg(0)), + FMSUB_D -> (fma :+ f64 :+ arg(2)), + FNMADD_D -> (fma :+ f64 :+ arg(3)), + FNMSUB_D -> (fma :+ f64 :+ arg(1)), + FMUL_D -> (mul :+ f64 :+ arg(0)), + FDIV_D -> (div :+ f64 ), + FSQRT_D -> (sqrt :+ f64 ), + FLW -> (fl :+ f64 ), + FSW -> (fs :+ f64 ), + FCVT_S_WU -> (fcvtI2f :+ f64 :+ arg(0)), + FCVT_S_W -> (fcvtI2f :+ f64 :+ arg(1)), + FCVT_WU_D -> (fcvtF2i :+ f64 :+ arg(0)), + FCVT_W_D -> (fcvtF2i :+ f64 :+ arg(1)), + FCLASS_D -> (fclass :+ f64 ), + FLE_D -> (fcmp :+ f64 :+ arg(0)), + FEQ_D -> (fcmp :+ f64 :+ arg(2)), + FLT_D -> (fcmp :+ f64 :+ arg(1)), + FSGNJ_D -> (fsgnj :+ f64 :+ arg(0)), + FSGNJN_D -> (fsgnj :+ f64 :+ arg(1)), + FSGNJX_D -> (fsgnj :+ f64 :+ arg(2)), + FMIN_D -> (fminMax :+ f64 :+ arg(0)), + FMAX_D -> (fminMax :+ f64 :+ arg(1)), + FCVT_D_S -> (fcvtxx :+ f32), + FCVT_S_D -> (fcvtxx :+ f64) + )) + } //TODO FMV_X_X + doubles port = FpuPort(p) @@ -178,7 +214,7 @@ class FpuPlugin(externalFpu : Boolean = false, port.cmd.rs2 := input(INSTRUCTION)(rs2Range).asUInt port.cmd.rs3 := input(INSTRUCTION)(rs3Range).asUInt port.cmd.rd := input(INSTRUCTION)(rdRange).asUInt - port.cmd.format := FpuFormat.FLOAT + port.cmd.format := (if(p.withDouble) input(FPU_FORMAT) else FpuFormat.FLOAT()) port.cmd.roundMode := roundMode.as(FpuRoundMode()) insert(FPU_FORKED) := forked || port.cmd.fire diff --git a/src/test/scala/vexriscv/ip/fpu/FpuTest.scala b/src/test/scala/vexriscv/ip/fpu/FpuTest.scala index 2e88265..43265f8 100644 --- a/src/test/scala/vexriscv/ip/fpu/FpuTest.scala +++ b/src/test/scala/vexriscv/ip/fpu/FpuTest.scala @@ -55,7 +55,7 @@ class FpuTest extends FunSuite{ } def testP(p : FpuParameter){ - val portCount = 1 + val portCount = 4 val config = SimConfig config.allOptimisation @@ -121,13 +121,13 @@ class FpuTest extends FunSuite{ def f64_f64_i32 = { val str = next val s = new Scanner(str) - val a,b,c = (nextLong(s)) - (b2d(a), b2d(b), c, s.nextInt(16)) + val a,b = (nextLong(s)) + (b2d(a), b2d(b), s.nextInt(16), s.nextInt(16)) } def f64_f64 = { val s = new Scanner(next) - val a,b = (s.nextLong(16)) + val a,b = nextLong(s) (b2d(a), b2d(b), s.nextInt(16)) } @@ -501,6 +501,16 @@ class FpuTest extends FunSuite{ // if(ref + Float.MinPositiveValue*2.0f === dut || dut + Float.MinPositiveValue*2.0f === ref) false } + + def checkDouble(ref : Double, dut : Double): Boolean ={ + if((d2b(ref) & Long.MinValue) != (d2b(dut) & Long.MinValue)) return false + if(ref == 0.0 && dut == 0.0 && d2b(ref) != d2b(dut)) return false + if(ref.isNaN && dut.isNaN) return true + if(ref == dut) return true + if(ref.abs * 1.0001 + Float.MinPositiveValue >= dut.abs*0.9999 && ref.abs * 0.9999 - Double.MinPositiveValue <= dut.abs*1.0001) return true + // if(ref + Float.MinPositiveValue*2.0f === dut || dut + Float.MinPositiveValue*2.0f === ref) + false + } def checkFloatExact(ref : Float, dut : Float): Boolean ={ if(ref.signum != dut.signum === dut) return false if(ref.isNaN && dut.isNaN) return true @@ -514,6 +524,11 @@ class FpuTest extends FunSuite{ (Random.nextDouble() * (Math.pow(2.0, exp)) * (if(Random.nextBoolean()) -1.0 else 1.0)).toFloat } + def randomDouble(): Double ={ + val exp = Random.nextInt(10)-5 + (Random.nextDouble() * (Math.pow(2.0, exp)) * (if(Random.nextBoolean()) -1.0 else 1.0)) + } + def testBinaryOp(op : (Int,Int,Int,FpuRoundMode.E, FpuFormat.E) => Unit, a : Float, b : Float, ref : Float, flag : Int, rounding : FpuRoundMode.E, opName : String): Unit ={ val rs = new RegAllocator() @@ -538,7 +553,7 @@ class FpuTest extends FunSuite{ load(rs2, b) op(rd,rs1,rs2, rounding, FpuFormat.DOUBLE) store(rd){v => - assert(d2b(v) == d2b(ref), f"## ${a} ${opName} $b = $v, $ref $rounding") + assert(d2b(v) == d2b(ref), f"## ${a} ${opName} $b = $v, $ref $rounding, ${d2b(a).toString(16)} ${d2b(b).toString(16)} ${d2b(ref).toString(16)}") } flagMatch(flag, ref, f"## ${opName} ${a} $b $ref $rounding") @@ -609,7 +624,7 @@ class FpuTest extends FunSuite{ store(rd){v => assert(d2b(v) == d2b(ref), f"testCvtF32F64Raw $a $ref $rounding") } - flagMatch(flag, f"testCvtF32F64Raw $a $ref $rounding") + flagMatch(flag,ref, f"testCvtF32F64Raw $a $ref $rounding") } def testCvtF64F32Raw(a : Double, ref : Float, flag : Int, rounding : FpuRoundMode.E): Unit ={ @@ -619,7 +634,7 @@ class FpuTest extends FunSuite{ storeFloat(rd){v => assert(d2b(v) == d2b(ref), f"testCvtF64F32Raw $a $ref $rounding") } - flagMatch(flag, f"testCvtF64F32Raw $a $ref $rounding") + flagMatch(flag, ref, f"testCvtF64F32Raw $a $ref $rounding") } @@ -646,6 +661,30 @@ class FpuTest extends FunSuite{ } + def testClassF64Raw(a : Double) : Unit = { + val rd = Random.nextInt(32) + + + load(rd, a) + fclass(rd, FpuFormat.DOUBLE){v => + val mantissa = d2b(a) & 0x000FFFFFFFFFFFFFl + val exp = (d2b(a) >> 52) & 0x7FF + val sign = (d2b(a) >> 63) & 0x1 + + val refBit = if(a.isInfinite) (if(sign == 0) 7 else 0) + else if(a.isNaN) (if((mantissa >> 51) != 0) 9 else 8) + else if(exp == 0 && mantissa != 0) (if(sign == 0) 5 else 2) + else if(exp == 0 && mantissa == 0) (if(sign == 0) 4 else 3) + else if(sign == 0) 6 else 1 + + val ref = 1 << refBit + + assert(v == ref, f"fclass $a") + } + } + + + def testFmaRaw(a : Float, b : Float, c : Float): Unit ={ val rs = new RegAllocator() val rs1, rs2, rs3 = rs.allocate() @@ -663,6 +702,23 @@ class FpuTest extends FunSuite{ } + + def testFmaF64Raw(a : Double, b : Double, c : Double): Unit ={ + val rs = new RegAllocator() + val rs1, rs2, rs3 = rs.allocate() + val rd = Random.nextInt(32) + load(rs1, a) + load(rs2, b) + load(rs3, c) + + fma(rd,rs1,rs2,rs3, FpuRoundMode.RNE, FpuFormat.DOUBLE) + store(rd){v => + val ref = a.toDouble * b.toDouble + c.toDouble + val mul = a.toDouble * b.toDouble + if((mul.abs-c.abs)/mul.abs > 0.1) assert(checkDouble(ref, v), f"$a%.20f * $b%.20f + $c%.20f = $v%.20f, $ref%.20f") + } + } + def testSqrtExact(a : Float, ref : Float, flag : Int, rounding : FpuRoundMode.E): Unit ={ val rs = new RegAllocator() val rs1, rs2, rs3 = rs.allocate() @@ -690,6 +746,32 @@ class FpuTest extends FunSuite{ } } + def testSqrtF64Exact(a : Double, ref : Double, flag : Int, rounding : FpuRoundMode.E): Unit ={ + val rs = new RegAllocator() + val rs1, rs2, rs3 = rs.allocate() + val rd = Random.nextInt(32) + load(rs1, a) + + sqrt(rd,rs1, FpuRoundMode.RNE, FpuFormat.DOUBLE) + store(rd){v => + val error = Math.abs(ref-v)/ref + assert(checkDouble(ref, v), f"sqrt($a) = $v, $ref $error $rounding") + } + } + + def testDivF64Exact(a : Double, b : Double, ref : Double, flag : Int, rounding : FpuRoundMode.E): Unit ={ + val rs = new RegAllocator() + val rs1, rs2, rs3 = rs.allocate() + val rd = Random.nextInt(32) + load(rs1, a) + load(rs2, b) + + div(rd,rs1, rs2, FpuRoundMode.RNE, FpuFormat.DOUBLE) + store(rd){v => + val error = Math.abs(ref-v)/ref + assert(checkDouble(ref, v), f"div($a, $b) = $v, $ref $error $rounding") + } + } def testF2iExact(a : Float, ref : Int, flag : Int, signed : Boolean, rounding : FpuRoundMode.E): Unit ={ @@ -793,6 +875,23 @@ class FpuTest extends FunSuite{ def testEqRaw(a : Float, b : Float, ref : Int, flag : Int) = testCmpExact(a,b,ref,flag, 2) def testLtRaw(a : Float, b : Float, ref : Int, flag : Int) = testCmpExact(a,b,ref,flag, 1) + + def testCmpF64Exact(a : Double, b : Double, ref : Int, flag : Int, arg : Int): Unit ={ + val rs = new RegAllocator() + val rs1, rs2, rs3 = rs.allocate() + val rd = Random.nextInt(32) + load(rs1, a) + load(rs2, b) + cmp(rs1, rs2, arg, FpuFormat.DOUBLE){rsp => + val v = rsp.value.toBigInt.toInt + assert(v === ref, f"cmp($a, $b, $arg) = $v, $ref") + } + flagMatch(flag,f"$a < $b $ref $flag ${d2b(a)} ${d2b(b)}") + } + def testLeF64Raw(a : Double, b : Double, ref : Int, flag : Int) = testCmpF64Exact(a,b,ref,flag, 0) + def testEqF64Raw(a : Double, b : Double, ref : Int, flag : Int) = testCmpF64Exact(a,b,ref,flag, 2) + def testLtF64Raw(a : Double, b : Double, ref : Int, flag : Int) = testCmpF64Exact(a,b,ref,flag, 1) + // def testFmv_x_w(a : Float): Unit ={ // val rs = new RegAllocator() // val rs1, rs2, rs3 = rs.allocate() @@ -849,6 +948,35 @@ class FpuTest extends FunSuite{ def testMaxExact(a : Float, b : Float) : Unit = testMinMaxExact(a,b,1) + def testMinMaxF64Exact(a : Double, b : Double, arg : Int): Unit ={ + val rs = new RegAllocator() + val rs1, rs2 = rs.allocate() + val rd = Random.nextInt(32) + val ref = (a,b) match { + case _ if a.isNaN && b.isNaN => b2d(0x7ff8000000000000l) + case _ if a.isNaN => b + case _ if b.isNaN => a + case _ => if(arg == 0) Math.min(a,b) else Math.max(a,b) + } + val flag = (a,b) match { + case _ if a.isNaN && ((d2b(a) >> 51 ) & 1) == 0 => 16 + case _ if b.isNaN && ((d2b(b) >> 51 ) & 1) == 0 => 16 + case _ => 0 + } + load(rs1, a) + load(rs2, b) + + minMax(rd,rs1,rs2, arg, FpuFormat.DOUBLE) + store(rd){v => + assert(d2b(ref) == d2b(v), f"minMax($a $b $arg) = $v, $ref") + } + flagMatch(flag, f"minmax($a $b $arg)") + } + + def testMinF64Exact(a : Double, b : Double) : Unit = testMinMaxF64Exact(a,b,0) + def testMaxF64Exact(a : Double, b : Double) : Unit = testMinMaxF64Exact(a,b,1) + + def testSgnjRaw(a : Float, b : Float): Unit ={ val ref = b2f((f2b(a) & ~0x80000000) | f2b(b) & 0x80000000) testBinaryOp(sgnj,a,b,ref,0, null,"sgnj") @@ -862,6 +990,23 @@ class FpuTest extends FunSuite{ testBinaryOp(sgnjx,a,b,ref,0, null,"sgnjx") } + val f64SignMask = 1l << 63 + def testSgnjF64Raw(a : Double, b : Double): Unit ={ + var ref = b2d((d2b(a).toLong & ~f64SignMask) | d2b(b).toLong & f64SignMask) + if(d2b(a).toLong >> 32 == -1) ref = a + testBinaryOpF64(sgnj,a,b,ref,0, null,"sgnj") + } + def testSgnjnF64Raw(a : Double, b : Double): Unit ={ + var ref = b2d((d2b(a).toLong & ~f64SignMask) | ((d2b(b).toLong & f64SignMask) ^ f64SignMask)) + if(d2b(a).toLong >> 32 == -1) ref = a + testBinaryOpF64(sgnjn,a,b,ref,0, null,"sgnjn") + } + def testSgnjxF64Raw(a : Double, b : Double): Unit ={ + var ref = b2d(d2b(a).toLong ^ (d2b(b).toLong & f64SignMask)) + if(d2b(a).toLong >> 32 == -1) ref = a + testBinaryOpF64(sgnjx,a,b,ref,0, null,"sgnjx") + } + def withMinus(that : Seq[Float]) = that.flatMap(f => List(f, -f)) val fZeros = withMinus(List(0.0f)) @@ -887,25 +1032,46 @@ class FpuTest extends FunSuite{ } } - def testFma() : Unit = { + def testFmaF32() : Unit = { testFmaRaw(randomFloat(), randomFloat(), randomFloat()) flagClear() } - def testLe() : Unit = { + + def testFmaF64() : Unit = { + testFmaF64Raw(randomDouble(), randomDouble(), randomDouble()) + flagClear() + } + + def testLeF32() : Unit = { val (a,b,i,f) = f32.le.RAW.f32_f32_i32 testLeRaw(a,b,i, f) } - def testLt() : Unit = { + def testLtF32() : Unit = { val (a,b,i,f) = f32.lt.RAW.f32_f32_i32 testLtRaw(a,b,i, f) } - def testEq() : Unit = { + def testEqF32() : Unit = { val (a,b,i,f) = f32.eq.RAW.f32_f32_i32 testEqRaw(a,b,i, f) } + def testLeF64() : Unit = { + val (a,b,i,f) = f64.le.RAW.f64_f64_i32 + testLeF64Raw(a,b,i, f) + } + def testLtF64() : Unit = { + val (a,b,i,f) = f64.lt.RAW.f64_f64_i32 + testLtF64Raw(a,b,i, f) + } + + def testEqF64() : Unit = { + val (a,b,i,f) = f64.eq.RAW.f64_f64_i32 + testEqF64Raw(a,b,i, f) + } + + def testF2uiF32() : Unit = { val rounding = FpuRoundMode.elements.randomPick() val (a,b,f) = f32.f2ui(rounding).f32_i32 @@ -945,7 +1111,7 @@ class FpuTest extends FunSuite{ flagClear() } - def testSgnj() : Unit = { + def testSgnjF32() : Unit = { testSgnjRaw(b2f(Random.nextInt()), b2f(Random.nextInt())) testSgnjnRaw(b2f(Random.nextInt()), b2f(Random.nextInt())) testSgnjxRaw(b2f(Random.nextInt()), b2f(Random.nextInt())) @@ -955,6 +1121,31 @@ class FpuTest extends FunSuite{ testSgnjxRaw(a, b) } + def testDivF64() : Unit = { + val rounding = FpuRoundMode.elements.randomPick() + val (a,b,r,f) = f64.div(rounding).f64_f64_f64 + testDivF64Exact(a, b, r, f, rounding) + flagClear() + } + + def testSqrtF64() : Unit = { + val rounding = FpuRoundMode.elements.randomPick() + val (a,r,f) = f64.sqrt(rounding).f64_f64 + testSqrtF64Exact(a, r, f, rounding) + flagClear() + } + + def testSgnjF64() : Unit = { + testSgnjF64Raw(b2d(Random.nextLong()), b2d(Random.nextLong())) + testSgnjnF64Raw(b2d(Random.nextLong()), b2d(Random.nextLong())) + testSgnjxF64Raw(b2d(Random.nextLong()), b2d(Random.nextLong())) + val (a,b,r,f) = f64.sgnj.RAW.f64_f64_i32 + testSgnjF64Raw(a, b) + testSgnjnF64Raw(a, b) + testSgnjxF64Raw(a, b) + } + + def testTransferF32() : Unit = { val (a,b,r,f) = f32.transfer.RAW.f32_f32_i32 testTransferF32Raw(a, Random.nextBoolean(), Random.nextBoolean()) @@ -985,20 +1176,35 @@ class FpuTest extends FunSuite{ testCvtF64F32Raw(a, r, f, rounding) } - def testClass() : Unit = { + def testClassF32() : Unit = { val (a,b,r,f) = f32.fclass.RAW.f32_f32_i32 testClassRaw(a) } - def testMin() : Unit = { + def testMinF32() : Unit = { val (a,b,r,f) = f32.min.RAW.f32_f32_f32 testMinExact(a,b) } - def testMax() : Unit = { + def testMaxF32() : Unit = { val (a,b,r,f) = f32.max.RAW.f32_f32_f32 testMaxExact(a,b) } + def testClassF64() : Unit = { + val (a,b,r,f) = f64.fclass.RAW.f64_f64_i32 + testClassF64Raw(a) + } + + def testMinF64() : Unit = { + val (a,b,r,f) = f64.min.RAW.f64_f64_f64 + testMinF64Exact(a,b) + } + def testMaxF64() : Unit = { + val (a,b,r,f) = f64.max.RAW.f64_f64_f64 + testMaxF64Exact(a,b) + } + + def testUI2f32() : Unit = { val rounding = FpuRoundMode.elements.randomPick() val (a,b,f) = f32.i2f(rounding).i32_f32 @@ -1061,21 +1267,69 @@ class FpuTest extends FunSuite{ } - val f32Tests = List[() => Unit](testSubF32, testAddF32, testMulF32, testI2f32, testUI2f32, testMin, testMax, testSgnj, testTransferF32, testDiv, testSqrt, testF2iF32, testF2uiF32, testLe, testEq, testLt, testClass, testFma) + val f32Tests = List[() => Unit](testSubF32, testAddF32, testMulF32, testI2f32, testUI2f32, testMinF32, testMaxF32, testSgnjF32, testTransferF32, testDiv, testSqrt, testF2iF32, testF2uiF32, testLeF32, testEqF32, testLtF32, testClassF32, testFmaF32) + val f64Tests = List[() => Unit](testSubF64, testAddF64, testMulF64, testI2f64, testUI2f64, testMinF64, testMaxF64, testSgnjF64, testTransferF64, testDiv, testSqrt, testF2iF64, testF2uiF64, testLeF64, testEqF64, testLtF64, testClassF64, testFmaF64, testCvtF32F64, testCvtF64F32) + var fxxTests = f32Tests + if(p.withDouble) fxxTests ++= f64Tests + + //TODO test boxing //TODO double <-> simple convertions if(p.withDouble) { + for(_ <- 0 until 10000) testCvtF64F32() // 1 did not equal 3 Flag missmatch dut=1 ref=3 testCvtF64F32Raw 1.1754942807573643E-38 1.17549435E-38 RMM + println("FCVT_D_S done") for(_ <- 0 until 10000) testCvtF32F64() println("FCVT_S_D done") - for(_ <- 0 until 10000) testCvtF64F32() - println("FCVT_D_S done") + + for(_ <- 0 until 10000) testF2iF64() + println("f64 f2i done") + for(_ <- 0 until 10000) testF2uiF64() + println("f64 f2ui done") + + + for(_ <- 0 until 10000) testSgnjF64() + println("f64 sgnj done") + + + + for(_ <- 0 until 10000) testMinF64() + for(_ <- 0 until 10000) testMaxF64() + println("f64 minMax done") + + + + for(i <- 0 until 1000) testFmaF64() + flagClear() + println("f64 fma done") //TODO + + + for(_ <- 0 until 10000) testLeF64() + for(_ <- 0 until 10000) testLtF64() + for(_ <- 0 until 10000) testEqF64() + println("f64 Cmp done") + + + for(_ <- 0 until 10000) testDivF64() + println("f64 div done") + + for(_ <- 0 until 10000) testSqrtF64() + println("f64 sqrt done") + + for(_ <- 0 until 10000) testClassF64() + println("f64 class done") +// + + + + + for(_ <- 0 until 10000) testAddF64() for(_ <- 0 until 10000) testSubF64() - println("Add done") + println("f64 Add done") // testI2f64Exact(0x7FFFFFF5, 0x7FFFFFF5, 0, true, FpuRoundMode.RNE) @@ -1083,9 +1337,7 @@ class FpuTest extends FunSuite{ for(_ <- 0 until 10000) testI2f64() println("f64 i2f done") - for(_ <- 0 until 10000) testF2uiF64() - for(_ <- 0 until 10000) testF2iF64() - println("f64 f2i done") + // testF2iExact(1.0f,1, 0, false, FpuRoundMode.RTZ) // testF2iExact(2.0f,2, 0, false, FpuRoundMode.RTZ) @@ -1156,7 +1408,7 @@ class FpuTest extends FunSuite{ - for(i <- 0 until 1000) testFma() + for(i <- 0 until 1000) testFmaF32() flagClear() println("fma done") //TODO @@ -1166,9 +1418,9 @@ class FpuTest extends FunSuite{ testEqRaw(Float.PositiveInfinity,Float.PositiveInfinity,1, 0) testEqRaw(0f, 0f,1, 0) - for(_ <- 0 until 10000) testLe() - for(_ <- 0 until 10000) testLt() - for(_ <- 0 until 10000) testEq() + for(_ <- 0 until 10000) testLeF32() + for(_ <- 0 until 10000) testLtF32() + for(_ <- 0 until 10000) testEqF32() println("Cmp done") @@ -1178,16 +1430,16 @@ class FpuTest extends FunSuite{ for(_ <- 0 until 10000) testSqrt() println("f32 sqrt done") - for(_ <- 0 until 10000) testSgnj() + for(_ <- 0 until 10000) testSgnjF32() println("f32 sgnj done") - for(_ <- 0 until 10000) testClass() + for(_ <- 0 until 10000) testClassF32() println("f32 class done") - for(_ <- 0 until 10000) testMin() - for(_ <- 0 until 10000) testMax() + for(_ <- 0 until 10000) testMinF32() + for(_ <- 0 until 10000) testMaxF32() println("minMax done") @@ -1229,11 +1481,13 @@ class FpuTest extends FunSuite{ // dut.clockDomain.waitSampling(1000) // simSuccess() - for(i <- 0 until 1000) f32Tests.randomPick()() + for(i <- 0 until 10000) fxxTests.randomPick()() waitUntil(cpu.rspQueue.isEmpty) } + + stim.foreach(_.join()) dut.clockDomain.waitSampling(100) }