fpu f64/f32 pass all tests

This commit is contained in:
Dolu1990 2021-02-12 14:48:44 +01:00
parent 9a25a12879
commit 7d3b35c32c
3 changed files with 396 additions and 74 deletions

View File

@ -56,6 +56,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
val arg = p.Arg() val arg = p.Arg()
val roundMode = FpuRoundMode() val roundMode = FpuRoundMode()
val format = p.withDouble generate FpuFormat() val format = p.withDouble generate FpuFormat()
val rs1Boxed, rs2Boxed = p.withDouble generate Bool()
} }
@ -79,6 +80,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
val arg = Bits(2 bits) val arg = Bits(2 bits)
val roundMode = FpuRoundMode() val roundMode = FpuRoundMode()
val format = p.withDouble generate FpuFormat() val format = p.withDouble generate FpuFormat()
val rs1Boxed, rs2Boxed = p.withDouble generate Bool()
} }
case class MulInput() extends Bundle{ case class MulInput() extends Bundle{
@ -198,7 +200,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
//TODO nan boxing decoding //TODO nan boxing decoding
val read = new Area{ val read = new Area{
val arbiter = StreamArbiterFactory.noLock.lowerFirst.build(FpuCmd(p), portCount) val arbiter = StreamArbiterFactory.noLock.roundRobin.build(FpuCmd(p), portCount)
arbiter.io.inputs <> Vec(io.port.map(_.cmd)) arbiter.io.inputs <> Vec(io.port.map(_.cmd))
val s0 = Stream(RfReadInput()) val s0 = Stream(RfReadInput())
@ -208,7 +210,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
val useRs1, useRs2, useRs3, useRd = False val useRs1, useRs2, useRs3, useRd = False
switch(s0.opcode){ switch(s0.opcode){
is(p.Opcode.LOAD) { useRd := True } is(p.Opcode.LOAD) { useRd := True }
is(p.Opcode.STORE) { useRs1 := True } is(p.Opcode.STORE) { useRs1 := True }
is(p.Opcode.ADD) { useRd := True; useRs1 := True; useRs2 := True } is(p.Opcode.ADD) { useRd := True; useRs1 := True; useRs2 := True }
is(p.Opcode.MUL) { useRd := True; useRs1 := True; useRs2 := True } is(p.Opcode.MUL) { useRd := True; useRs1 := True; useRs2 := True }
@ -261,20 +263,25 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
output.rs2 := rs2Entry.value output.rs2 := rs2Entry.value
output.rs3 := rs3Entry.value output.rs3 := rs3Entry.value
if(p.withDouble){ if(p.withDouble){
output.rs1Boxed := rs1Entry.boxed
output.rs2Boxed := rs2Entry.boxed
output.format := s1.format output.format := s1.format
val store = s1.opcode === FpuOpcode.STORE ||s1.opcode === FpuOpcode.FMV_X_W val store = s1.opcode === FpuOpcode.STORE ||s1.opcode === FpuOpcode.FMV_X_W
when(store){ //Pass through val sgnjBypass = s1.opcode === FpuOpcode.SGNJ && s1.format === FpuFormat.DOUBLE
output.format := rs1Entry.boxed ? FpuFormat.FLOAT | FpuFormat.DOUBLE when(!sgnjBypass) {
} elsewhen(s1.format === FpuFormat.FLOAT =/= rs1Entry.boxed){ when(store) { //Pass through
output.rs1.setNanQuiet output.format := rs1Entry.boxed ? FpuFormat.FLOAT | FpuFormat.DOUBLE
output.rs1.sign := False } elsewhen (s1.format === FpuFormat.FLOAT =/= rs1Entry.boxed) {
} output.rs1.setNanQuiet
when(s1.format === FpuFormat.FLOAT =/= rs2Entry.boxed){ output.rs1.sign := False
output.rs2.setNanQuiet }
output.rs2.sign := False when(s1.format === FpuFormat.FLOAT =/= rs2Entry.boxed) {
} output.rs2.setNanQuiet
when(s1.format === FpuFormat.FLOAT =/= rs3Entry.boxed){ output.rs2.sign := False
output.rs3.setNanQuiet }
when(s1.format === FpuFormat.FLOAT =/= rs3Entry.boxed) {
output.rs3.setNanQuiet
}
} }
} }
} }
@ -686,8 +693,11 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
) )
val result = (Mux(resign, ~unsigned, unsigned) + (resign ^ increment).asUInt) val result = (Mux(resign, ~unsigned, unsigned) + (resign ^ increment).asUInt)
val overflow = (input.rs1.exponent > (input.arg(0) ? U(exponentOne+30) | U(exponentOne+31)) || input.rs1.isInfinity) && !input.rs1.sign || input.rs1.isNan val overflow = (input.rs1.exponent > (input.arg(0) ? U(exponentOne+30) | U(exponentOne+31)) || input.rs1.isInfinity) && !input.rs1.sign || input.rs1.isNan
val underflow = (input.rs1.exponent > U(exponentOne+31) || input.arg(0) && unsigned.msb && unsigned(30 downto 0) =/= 0 || !input.arg(0) && (unsigned =/= 0 || increment) || input.rs1.isInfinity) && input.rs1.sign val underflow = (input.rs1.exponent > U(exponentOne+31) || input.arg(0) && unsigned.msb && (unsigned(30 downto 0) =/= 0 || increment) || !input.arg(0) && (unsigned =/= 0 || increment) || input.rs1.isInfinity) && input.rs1.sign
val isZero = input.rs1.isZero val isZero = input.rs1.isZero
if(p.withDouble){
overflow setWhen(!input.rs1.sign && increment && unsigned(30 downto 0).andR && (input.arg(0) || unsigned(31)))
}
when(isZero){ when(isZero){
result := 0 result := 0
} elsewhen(underflow || overflow) { } elsewhen(underflow || overflow) {
@ -720,7 +730,13 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
val minMaxSelectNanQuiet = input.rs1.isNan && input.rs2.isNan val minMaxSelectNanQuiet = input.rs1.isNan && input.rs2.isNan
val cmpResult = B(rs1Smaller && !bothZero && !input.arg(1) || (rs1Equal || bothZero) && !input.arg(0)) val cmpResult = B(rs1Smaller && !bothZero && !input.arg(1) || (rs1Equal || bothZero) && !input.arg(0))
when(input.rs1.isNan || input.rs2.isNan) { cmpResult := 0 } when(input.rs1.isNan || input.rs2.isNan) { cmpResult := 0 }
val sgnjResult = (input.rs1.sign && input.arg(1)) ^ input.rs2.sign ^ input.arg(0) val sgnjRs1Sign = CombInit(input.rs1.sign)
val sgnjRs2Sign = CombInit(input.rs2.sign)
if(p.withDouble){
sgnjRs1Sign setWhen(input.rs1Boxed && input.format === FpuFormat.DOUBLE)
sgnjRs2Sign setWhen(input.rs2Boxed && input.format === FpuFormat.DOUBLE)
}
val sgnjResult = (sgnjRs1Sign && input.arg(1)) ^ sgnjRs2Sign ^ input.arg(0)
val fclassResult = B(0, 32 bits) val fclassResult = B(0, 32 bits)
val decoded = input.rs1.decode() val decoded = input.rs1.decode()
fclassResult(0) := input.rs1.sign && decoded.isInfinity fclassResult(0) := input.rs1.sign && decoded.isInfinity
@ -771,6 +787,22 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
} }
is(FpuOpcode.SGNJ){ is(FpuOpcode.SGNJ){
rfOutput.value.sign := sgnjResult rfOutput.value.sign := sgnjResult
if(p.withDouble) when(input.format === FpuFormat.DOUBLE){
when(input.rs1Boxed){
rfOutput.value.sign := input.rs1.sign
rfOutput.format := FpuFormat.FLOAT
}
// //kill boxing => F32 -> F64 NAN
// when(input.rs1Boxed && !sgnjResult){
// rfOutput.value.setNan
// rfOutput.value.mantissa.setAll()
// rfOutput.value.mantissa(31 downto 0) := input.rs1.sign ## input.rs1.exponent
// }
// //Spawn boxing => F64 NAN -> F32
// when(!input.rs1Boxed && input.rs1.exponent === exponentOne + 1024 && input.rs1.mantissa(32, 52-32 bits).andR && sgnjResult){
//
// }
}
} }
if(p.withDouble) is(FpuOpcode.FCVT_X_X){ if(p.withDouble) is(FpuOpcode.FCVT_X_X){
rfOutput.format := ((input.format === FpuFormat.FLOAT) ? FpuFormat.DOUBLE | FpuFormat.FLOAT) rfOutput.format := ((input.format === FpuFormat.FLOAT) ? FpuFormat.DOUBLE | FpuFormat.FLOAT)

View File

@ -17,6 +17,7 @@ class FpuPlugin(externalFpu : Boolean = false,
object FPU_FORKED extends Stageable(Bool()) object FPU_FORKED extends Stageable(Bool())
object FPU_OPCODE extends Stageable(FpuOpcode()) object FPU_OPCODE extends Stageable(FpuOpcode())
object FPU_ARG extends Stageable(Bits(2 bits)) object FPU_ARG extends Stageable(Bits(2 bits))
object FPU_FORMAT extends Stageable(FpuFormat())
var port : FpuPort = null var port : FpuPort = null
@ -49,6 +50,7 @@ class FpuPlugin(externalFpu : Boolean = false,
val fminMax = floatRfWrite :+ FPU_OPCODE -> FpuOpcode.MIN_MAX val fminMax = floatRfWrite :+ FPU_OPCODE -> FpuOpcode.MIN_MAX
val fmvWx = floatRfWrite :+ FPU_OPCODE -> FpuOpcode.FMV_W_X :+ RS1_USE -> True val fmvWx = floatRfWrite :+ FPU_OPCODE -> FpuOpcode.FMV_W_X :+ RS1_USE -> True
val fcvtI2f = floatRfWrite :+ FPU_OPCODE -> FpuOpcode.I2F :+ RS1_USE -> True val fcvtI2f = floatRfWrite :+ FPU_OPCODE -> FpuOpcode.I2F :+ RS1_USE -> True
val fcvtxx = floatRfWrite :+ FPU_OPCODE -> FpuOpcode.FCVT_X_X
val fcmp = intRfWrite :+ FPU_OPCODE -> FpuOpcode.CMP val fcmp = intRfWrite :+ FPU_OPCODE -> FpuOpcode.CMP
val fclass = intRfWrite :+ FPU_OPCODE -> FpuOpcode.FCLASS val fclass = intRfWrite :+ FPU_OPCODE -> FpuOpcode.FCLASS
@ -73,35 +75,69 @@ class FpuPlugin(externalFpu : Boolean = false,
def arg(v : Int) = FPU_ARG -> U(v, 2 bits) def arg(v : Int) = FPU_ARG -> U(v, 2 bits)
val decoderService = pipeline.service(classOf[DecoderService]) val decoderService = pipeline.service(classOf[DecoderService])
decoderService.addDefault(FPU_ENABLE, False) decoderService.addDefault(FPU_ENABLE, False)
val f32 = FPU_FORMAT -> FpuFormat.FLOAT
val f64 = FPU_FORMAT -> FpuFormat.DOUBLE
decoderService.add(List( decoderService.add(List(
FADD_S -> (addSub :+ arg(0)), FADD_S -> (addSub :+ f32 :+ arg(0)),
FSUB_S -> (addSub :+ arg(1)), FSUB_S -> (addSub :+ f32 :+ arg(1)),
FMADD_S -> (fma :+ arg(0)), FMADD_S -> (fma :+ f32 :+ arg(0)),
FMSUB_S -> (fma :+ arg(2)), FMSUB_S -> (fma :+ f32 :+ arg(2)),
FNMADD_S -> (fma :+ arg(3)), FNMADD_S -> (fma :+ f32 :+ arg(3)),
FNMSUB_S -> (fma :+ arg(1)), FNMSUB_S -> (fma :+ f32 :+ arg(1)),
FMUL_S -> (mul :+ arg(0)), FMUL_S -> (mul :+ f32 :+ arg(0)),
FDIV_S -> (div), FDIV_S -> (div :+ f32 ),
FSQRT_S -> (sqrt), FSQRT_S -> (sqrt :+ f32 ),
FLW -> (fl), FLW -> (fl :+ f32 ),
FSW -> (fs), FSW -> (fs :+ f32 ),
FCVT_S_WU -> (fcvtI2f :+ arg(0)), FCVT_S_WU -> (fcvtI2f :+ f32 :+ arg(0)),
FCVT_S_W -> (fcvtI2f :+ arg(1)), FCVT_S_W -> (fcvtI2f :+ f32 :+ arg(1)),
FCVT_WU_S -> (fcvtF2i :+ arg(0)), FCVT_WU_S -> (fcvtF2i :+ f32 :+ arg(0)),
FCVT_W_S -> (fcvtF2i :+ arg(1)), FCVT_W_S -> (fcvtF2i :+ f32 :+ arg(1)),
FCLASS_S -> (fclass), FCLASS_S -> (fclass :+ f32 ),
FLE_S -> (fcmp :+ arg(0)), FLE_S -> (fcmp :+ f32 :+ arg(0)),
FEQ_S -> (fcmp :+ arg(2)), FEQ_S -> (fcmp :+ f32 :+ arg(2)),
FLT_S -> (fcmp :+ arg(1)), FLT_S -> (fcmp :+ f32 :+ arg(1)),
FSGNJ_S -> (fsgnj :+ arg(0)), FSGNJ_S -> (fsgnj :+ f32 :+ arg(0)),
FSGNJN_S -> (fsgnj :+ arg(1)), FSGNJN_S -> (fsgnj :+ f32 :+ arg(1)),
FSGNJX_S -> (fsgnj :+ arg(2)), FSGNJX_S -> (fsgnj :+ f32 :+ arg(2)),
FMIN_S -> (fminMax :+ arg(0)), FMIN_S -> (fminMax :+ f32 :+ arg(0)),
FMAX_S -> (fminMax :+ arg(1)), FMAX_S -> (fminMax :+ f32 :+ arg(1)),
FMV_X_W -> (fmvXw), FMV_X_W -> (fmvXw :+ f32 ),
FMV_W_X -> (fmvWx) FMV_W_X -> (fmvWx :+ f32 )
)) ))
if(p.withDouble){
decoderService.add(List(
FADD_D -> (addSub :+ f64 :+ arg(0)),
FSUB_D -> (addSub :+ f64 :+ arg(1)),
FMADD_D -> (fma :+ f64 :+ arg(0)),
FMSUB_D -> (fma :+ f64 :+ arg(2)),
FNMADD_D -> (fma :+ f64 :+ arg(3)),
FNMSUB_D -> (fma :+ f64 :+ arg(1)),
FMUL_D -> (mul :+ f64 :+ arg(0)),
FDIV_D -> (div :+ f64 ),
FSQRT_D -> (sqrt :+ f64 ),
FLW -> (fl :+ f64 ),
FSW -> (fs :+ f64 ),
FCVT_S_WU -> (fcvtI2f :+ f64 :+ arg(0)),
FCVT_S_W -> (fcvtI2f :+ f64 :+ arg(1)),
FCVT_WU_D -> (fcvtF2i :+ f64 :+ arg(0)),
FCVT_W_D -> (fcvtF2i :+ f64 :+ arg(1)),
FCLASS_D -> (fclass :+ f64 ),
FLE_D -> (fcmp :+ f64 :+ arg(0)),
FEQ_D -> (fcmp :+ f64 :+ arg(2)),
FLT_D -> (fcmp :+ f64 :+ arg(1)),
FSGNJ_D -> (fsgnj :+ f64 :+ arg(0)),
FSGNJN_D -> (fsgnj :+ f64 :+ arg(1)),
FSGNJX_D -> (fsgnj :+ f64 :+ arg(2)),
FMIN_D -> (fminMax :+ f64 :+ arg(0)),
FMAX_D -> (fminMax :+ f64 :+ arg(1)),
FCVT_D_S -> (fcvtxx :+ f32),
FCVT_S_D -> (fcvtxx :+ f64)
))
}
//TODO FMV_X_X + doubles //TODO FMV_X_X + doubles
port = FpuPort(p) port = FpuPort(p)
@ -178,7 +214,7 @@ class FpuPlugin(externalFpu : Boolean = false,
port.cmd.rs2 := input(INSTRUCTION)(rs2Range).asUInt port.cmd.rs2 := input(INSTRUCTION)(rs2Range).asUInt
port.cmd.rs3 := input(INSTRUCTION)(rs3Range).asUInt port.cmd.rs3 := input(INSTRUCTION)(rs3Range).asUInt
port.cmd.rd := input(INSTRUCTION)(rdRange).asUInt port.cmd.rd := input(INSTRUCTION)(rdRange).asUInt
port.cmd.format := FpuFormat.FLOAT port.cmd.format := (if(p.withDouble) input(FPU_FORMAT) else FpuFormat.FLOAT())
port.cmd.roundMode := roundMode.as(FpuRoundMode()) port.cmd.roundMode := roundMode.as(FpuRoundMode())
insert(FPU_FORKED) := forked || port.cmd.fire insert(FPU_FORKED) := forked || port.cmd.fire

View File

@ -55,7 +55,7 @@ class FpuTest extends FunSuite{
} }
def testP(p : FpuParameter){ def testP(p : FpuParameter){
val portCount = 1 val portCount = 4
val config = SimConfig val config = SimConfig
config.allOptimisation config.allOptimisation
@ -121,13 +121,13 @@ class FpuTest extends FunSuite{
def f64_f64_i32 = { def f64_f64_i32 = {
val str = next val str = next
val s = new Scanner(str) val s = new Scanner(str)
val a,b,c = (nextLong(s)) val a,b = (nextLong(s))
(b2d(a), b2d(b), c, s.nextInt(16)) (b2d(a), b2d(b), s.nextInt(16), s.nextInt(16))
} }
def f64_f64 = { def f64_f64 = {
val s = new Scanner(next) val s = new Scanner(next)
val a,b = (s.nextLong(16)) val a,b = nextLong(s)
(b2d(a), b2d(b), s.nextInt(16)) (b2d(a), b2d(b), s.nextInt(16))
} }
@ -501,6 +501,16 @@ class FpuTest extends FunSuite{
// if(ref + Float.MinPositiveValue*2.0f === dut || dut + Float.MinPositiveValue*2.0f === ref) // if(ref + Float.MinPositiveValue*2.0f === dut || dut + Float.MinPositiveValue*2.0f === ref)
false false
} }
def checkDouble(ref : Double, dut : Double): Boolean ={
if((d2b(ref) & Long.MinValue) != (d2b(dut) & Long.MinValue)) return false
if(ref == 0.0 && dut == 0.0 && d2b(ref) != d2b(dut)) return false
if(ref.isNaN && dut.isNaN) return true
if(ref == dut) return true
if(ref.abs * 1.0001 + Float.MinPositiveValue >= dut.abs*0.9999 && ref.abs * 0.9999 - Double.MinPositiveValue <= dut.abs*1.0001) return true
// if(ref + Float.MinPositiveValue*2.0f === dut || dut + Float.MinPositiveValue*2.0f === ref)
false
}
def checkFloatExact(ref : Float, dut : Float): Boolean ={ def checkFloatExact(ref : Float, dut : Float): Boolean ={
if(ref.signum != dut.signum === dut) return false if(ref.signum != dut.signum === dut) return false
if(ref.isNaN && dut.isNaN) return true if(ref.isNaN && dut.isNaN) return true
@ -514,6 +524,11 @@ class FpuTest extends FunSuite{
(Random.nextDouble() * (Math.pow(2.0, exp)) * (if(Random.nextBoolean()) -1.0 else 1.0)).toFloat (Random.nextDouble() * (Math.pow(2.0, exp)) * (if(Random.nextBoolean()) -1.0 else 1.0)).toFloat
} }
def randomDouble(): Double ={
val exp = Random.nextInt(10)-5
(Random.nextDouble() * (Math.pow(2.0, exp)) * (if(Random.nextBoolean()) -1.0 else 1.0))
}
def testBinaryOp(op : (Int,Int,Int,FpuRoundMode.E, FpuFormat.E) => Unit, a : Float, b : Float, ref : Float, flag : Int, rounding : FpuRoundMode.E, opName : String): Unit ={ def testBinaryOp(op : (Int,Int,Int,FpuRoundMode.E, FpuFormat.E) => Unit, a : Float, b : Float, ref : Float, flag : Int, rounding : FpuRoundMode.E, opName : String): Unit ={
val rs = new RegAllocator() val rs = new RegAllocator()
@ -538,7 +553,7 @@ class FpuTest extends FunSuite{
load(rs2, b) load(rs2, b)
op(rd,rs1,rs2, rounding, FpuFormat.DOUBLE) op(rd,rs1,rs2, rounding, FpuFormat.DOUBLE)
store(rd){v => store(rd){v =>
assert(d2b(v) == d2b(ref), f"## ${a} ${opName} $b = $v, $ref $rounding") assert(d2b(v) == d2b(ref), f"## ${a} ${opName} $b = $v, $ref $rounding, ${d2b(a).toString(16)} ${d2b(b).toString(16)} ${d2b(ref).toString(16)}")
} }
flagMatch(flag, ref, f"## ${opName} ${a} $b $ref $rounding") flagMatch(flag, ref, f"## ${opName} ${a} $b $ref $rounding")
@ -609,7 +624,7 @@ class FpuTest extends FunSuite{
store(rd){v => store(rd){v =>
assert(d2b(v) == d2b(ref), f"testCvtF32F64Raw $a $ref $rounding") assert(d2b(v) == d2b(ref), f"testCvtF32F64Raw $a $ref $rounding")
} }
flagMatch(flag, f"testCvtF32F64Raw $a $ref $rounding") flagMatch(flag,ref, f"testCvtF32F64Raw $a $ref $rounding")
} }
def testCvtF64F32Raw(a : Double, ref : Float, flag : Int, rounding : FpuRoundMode.E): Unit ={ def testCvtF64F32Raw(a : Double, ref : Float, flag : Int, rounding : FpuRoundMode.E): Unit ={
@ -619,7 +634,7 @@ class FpuTest extends FunSuite{
storeFloat(rd){v => storeFloat(rd){v =>
assert(d2b(v) == d2b(ref), f"testCvtF64F32Raw $a $ref $rounding") assert(d2b(v) == d2b(ref), f"testCvtF64F32Raw $a $ref $rounding")
} }
flagMatch(flag, f"testCvtF64F32Raw $a $ref $rounding") flagMatch(flag, ref, f"testCvtF64F32Raw $a $ref $rounding")
} }
@ -646,6 +661,30 @@ class FpuTest extends FunSuite{
} }
def testClassF64Raw(a : Double) : Unit = {
val rd = Random.nextInt(32)
load(rd, a)
fclass(rd, FpuFormat.DOUBLE){v =>
val mantissa = d2b(a) & 0x000FFFFFFFFFFFFFl
val exp = (d2b(a) >> 52) & 0x7FF
val sign = (d2b(a) >> 63) & 0x1
val refBit = if(a.isInfinite) (if(sign == 0) 7 else 0)
else if(a.isNaN) (if((mantissa >> 51) != 0) 9 else 8)
else if(exp == 0 && mantissa != 0) (if(sign == 0) 5 else 2)
else if(exp == 0 && mantissa == 0) (if(sign == 0) 4 else 3)
else if(sign == 0) 6 else 1
val ref = 1 << refBit
assert(v == ref, f"fclass $a")
}
}
def testFmaRaw(a : Float, b : Float, c : Float): Unit ={ def testFmaRaw(a : Float, b : Float, c : Float): Unit ={
val rs = new RegAllocator() val rs = new RegAllocator()
val rs1, rs2, rs3 = rs.allocate() val rs1, rs2, rs3 = rs.allocate()
@ -663,6 +702,23 @@ class FpuTest extends FunSuite{
} }
def testFmaF64Raw(a : Double, b : Double, c : Double): Unit ={
val rs = new RegAllocator()
val rs1, rs2, rs3 = rs.allocate()
val rd = Random.nextInt(32)
load(rs1, a)
load(rs2, b)
load(rs3, c)
fma(rd,rs1,rs2,rs3, FpuRoundMode.RNE, FpuFormat.DOUBLE)
store(rd){v =>
val ref = a.toDouble * b.toDouble + c.toDouble
val mul = a.toDouble * b.toDouble
if((mul.abs-c.abs)/mul.abs > 0.1) assert(checkDouble(ref, v), f"$a%.20f * $b%.20f + $c%.20f = $v%.20f, $ref%.20f")
}
}
def testSqrtExact(a : Float, ref : Float, flag : Int, rounding : FpuRoundMode.E): Unit ={ def testSqrtExact(a : Float, ref : Float, flag : Int, rounding : FpuRoundMode.E): Unit ={
val rs = new RegAllocator() val rs = new RegAllocator()
val rs1, rs2, rs3 = rs.allocate() val rs1, rs2, rs3 = rs.allocate()
@ -690,6 +746,32 @@ class FpuTest extends FunSuite{
} }
} }
def testSqrtF64Exact(a : Double, ref : Double, flag : Int, rounding : FpuRoundMode.E): Unit ={
val rs = new RegAllocator()
val rs1, rs2, rs3 = rs.allocate()
val rd = Random.nextInt(32)
load(rs1, a)
sqrt(rd,rs1, FpuRoundMode.RNE, FpuFormat.DOUBLE)
store(rd){v =>
val error = Math.abs(ref-v)/ref
assert(checkDouble(ref, v), f"sqrt($a) = $v, $ref $error $rounding")
}
}
def testDivF64Exact(a : Double, b : Double, ref : Double, flag : Int, rounding : FpuRoundMode.E): Unit ={
val rs = new RegAllocator()
val rs1, rs2, rs3 = rs.allocate()
val rd = Random.nextInt(32)
load(rs1, a)
load(rs2, b)
div(rd,rs1, rs2, FpuRoundMode.RNE, FpuFormat.DOUBLE)
store(rd){v =>
val error = Math.abs(ref-v)/ref
assert(checkDouble(ref, v), f"div($a, $b) = $v, $ref $error $rounding")
}
}
def testF2iExact(a : Float, ref : Int, flag : Int, signed : Boolean, rounding : FpuRoundMode.E): Unit ={ def testF2iExact(a : Float, ref : Int, flag : Int, signed : Boolean, rounding : FpuRoundMode.E): Unit ={
@ -793,6 +875,23 @@ class FpuTest extends FunSuite{
def testEqRaw(a : Float, b : Float, ref : Int, flag : Int) = testCmpExact(a,b,ref,flag, 2) def testEqRaw(a : Float, b : Float, ref : Int, flag : Int) = testCmpExact(a,b,ref,flag, 2)
def testLtRaw(a : Float, b : Float, ref : Int, flag : Int) = testCmpExact(a,b,ref,flag, 1) def testLtRaw(a : Float, b : Float, ref : Int, flag : Int) = testCmpExact(a,b,ref,flag, 1)
def testCmpF64Exact(a : Double, b : Double, ref : Int, flag : Int, arg : Int): Unit ={
val rs = new RegAllocator()
val rs1, rs2, rs3 = rs.allocate()
val rd = Random.nextInt(32)
load(rs1, a)
load(rs2, b)
cmp(rs1, rs2, arg, FpuFormat.DOUBLE){rsp =>
val v = rsp.value.toBigInt.toInt
assert(v === ref, f"cmp($a, $b, $arg) = $v, $ref")
}
flagMatch(flag,f"$a < $b $ref $flag ${d2b(a)} ${d2b(b)}")
}
def testLeF64Raw(a : Double, b : Double, ref : Int, flag : Int) = testCmpF64Exact(a,b,ref,flag, 0)
def testEqF64Raw(a : Double, b : Double, ref : Int, flag : Int) = testCmpF64Exact(a,b,ref,flag, 2)
def testLtF64Raw(a : Double, b : Double, ref : Int, flag : Int) = testCmpF64Exact(a,b,ref,flag, 1)
// def testFmv_x_w(a : Float): Unit ={ // def testFmv_x_w(a : Float): Unit ={
// val rs = new RegAllocator() // val rs = new RegAllocator()
// val rs1, rs2, rs3 = rs.allocate() // val rs1, rs2, rs3 = rs.allocate()
@ -849,6 +948,35 @@ class FpuTest extends FunSuite{
def testMaxExact(a : Float, b : Float) : Unit = testMinMaxExact(a,b,1) def testMaxExact(a : Float, b : Float) : Unit = testMinMaxExact(a,b,1)
def testMinMaxF64Exact(a : Double, b : Double, arg : Int): Unit ={
val rs = new RegAllocator()
val rs1, rs2 = rs.allocate()
val rd = Random.nextInt(32)
val ref = (a,b) match {
case _ if a.isNaN && b.isNaN => b2d(0x7ff8000000000000l)
case _ if a.isNaN => b
case _ if b.isNaN => a
case _ => if(arg == 0) Math.min(a,b) else Math.max(a,b)
}
val flag = (a,b) match {
case _ if a.isNaN && ((d2b(a) >> 51 ) & 1) == 0 => 16
case _ if b.isNaN && ((d2b(b) >> 51 ) & 1) == 0 => 16
case _ => 0
}
load(rs1, a)
load(rs2, b)
minMax(rd,rs1,rs2, arg, FpuFormat.DOUBLE)
store(rd){v =>
assert(d2b(ref) == d2b(v), f"minMax($a $b $arg) = $v, $ref")
}
flagMatch(flag, f"minmax($a $b $arg)")
}
def testMinF64Exact(a : Double, b : Double) : Unit = testMinMaxF64Exact(a,b,0)
def testMaxF64Exact(a : Double, b : Double) : Unit = testMinMaxF64Exact(a,b,1)
def testSgnjRaw(a : Float, b : Float): Unit ={ def testSgnjRaw(a : Float, b : Float): Unit ={
val ref = b2f((f2b(a) & ~0x80000000) | f2b(b) & 0x80000000) val ref = b2f((f2b(a) & ~0x80000000) | f2b(b) & 0x80000000)
testBinaryOp(sgnj,a,b,ref,0, null,"sgnj") testBinaryOp(sgnj,a,b,ref,0, null,"sgnj")
@ -862,6 +990,23 @@ class FpuTest extends FunSuite{
testBinaryOp(sgnjx,a,b,ref,0, null,"sgnjx") testBinaryOp(sgnjx,a,b,ref,0, null,"sgnjx")
} }
val f64SignMask = 1l << 63
def testSgnjF64Raw(a : Double, b : Double): Unit ={
var ref = b2d((d2b(a).toLong & ~f64SignMask) | d2b(b).toLong & f64SignMask)
if(d2b(a).toLong >> 32 == -1) ref = a
testBinaryOpF64(sgnj,a,b,ref,0, null,"sgnj")
}
def testSgnjnF64Raw(a : Double, b : Double): Unit ={
var ref = b2d((d2b(a).toLong & ~f64SignMask) | ((d2b(b).toLong & f64SignMask) ^ f64SignMask))
if(d2b(a).toLong >> 32 == -1) ref = a
testBinaryOpF64(sgnjn,a,b,ref,0, null,"sgnjn")
}
def testSgnjxF64Raw(a : Double, b : Double): Unit ={
var ref = b2d(d2b(a).toLong ^ (d2b(b).toLong & f64SignMask))
if(d2b(a).toLong >> 32 == -1) ref = a
testBinaryOpF64(sgnjx,a,b,ref,0, null,"sgnjx")
}
def withMinus(that : Seq[Float]) = that.flatMap(f => List(f, -f)) def withMinus(that : Seq[Float]) = that.flatMap(f => List(f, -f))
val fZeros = withMinus(List(0.0f)) val fZeros = withMinus(List(0.0f))
@ -887,25 +1032,46 @@ class FpuTest extends FunSuite{
} }
} }
def testFma() : Unit = { def testFmaF32() : Unit = {
testFmaRaw(randomFloat(), randomFloat(), randomFloat()) testFmaRaw(randomFloat(), randomFloat(), randomFloat())
flagClear() flagClear()
} }
def testLe() : Unit = {
def testFmaF64() : Unit = {
testFmaF64Raw(randomDouble(), randomDouble(), randomDouble())
flagClear()
}
def testLeF32() : Unit = {
val (a,b,i,f) = f32.le.RAW.f32_f32_i32 val (a,b,i,f) = f32.le.RAW.f32_f32_i32
testLeRaw(a,b,i, f) testLeRaw(a,b,i, f)
} }
def testLt() : Unit = { def testLtF32() : Unit = {
val (a,b,i,f) = f32.lt.RAW.f32_f32_i32 val (a,b,i,f) = f32.lt.RAW.f32_f32_i32
testLtRaw(a,b,i, f) testLtRaw(a,b,i, f)
} }
def testEq() : Unit = { def testEqF32() : Unit = {
val (a,b,i,f) = f32.eq.RAW.f32_f32_i32 val (a,b,i,f) = f32.eq.RAW.f32_f32_i32
testEqRaw(a,b,i, f) testEqRaw(a,b,i, f)
} }
def testLeF64() : Unit = {
val (a,b,i,f) = f64.le.RAW.f64_f64_i32
testLeF64Raw(a,b,i, f)
}
def testLtF64() : Unit = {
val (a,b,i,f) = f64.lt.RAW.f64_f64_i32
testLtF64Raw(a,b,i, f)
}
def testEqF64() : Unit = {
val (a,b,i,f) = f64.eq.RAW.f64_f64_i32
testEqF64Raw(a,b,i, f)
}
def testF2uiF32() : Unit = { def testF2uiF32() : Unit = {
val rounding = FpuRoundMode.elements.randomPick() val rounding = FpuRoundMode.elements.randomPick()
val (a,b,f) = f32.f2ui(rounding).f32_i32 val (a,b,f) = f32.f2ui(rounding).f32_i32
@ -945,7 +1111,7 @@ class FpuTest extends FunSuite{
flagClear() flagClear()
} }
def testSgnj() : Unit = { def testSgnjF32() : Unit = {
testSgnjRaw(b2f(Random.nextInt()), b2f(Random.nextInt())) testSgnjRaw(b2f(Random.nextInt()), b2f(Random.nextInt()))
testSgnjnRaw(b2f(Random.nextInt()), b2f(Random.nextInt())) testSgnjnRaw(b2f(Random.nextInt()), b2f(Random.nextInt()))
testSgnjxRaw(b2f(Random.nextInt()), b2f(Random.nextInt())) testSgnjxRaw(b2f(Random.nextInt()), b2f(Random.nextInt()))
@ -955,6 +1121,31 @@ class FpuTest extends FunSuite{
testSgnjxRaw(a, b) testSgnjxRaw(a, b)
} }
def testDivF64() : Unit = {
val rounding = FpuRoundMode.elements.randomPick()
val (a,b,r,f) = f64.div(rounding).f64_f64_f64
testDivF64Exact(a, b, r, f, rounding)
flagClear()
}
def testSqrtF64() : Unit = {
val rounding = FpuRoundMode.elements.randomPick()
val (a,r,f) = f64.sqrt(rounding).f64_f64
testSqrtF64Exact(a, r, f, rounding)
flagClear()
}
def testSgnjF64() : Unit = {
testSgnjF64Raw(b2d(Random.nextLong()), b2d(Random.nextLong()))
testSgnjnF64Raw(b2d(Random.nextLong()), b2d(Random.nextLong()))
testSgnjxF64Raw(b2d(Random.nextLong()), b2d(Random.nextLong()))
val (a,b,r,f) = f64.sgnj.RAW.f64_f64_i32
testSgnjF64Raw(a, b)
testSgnjnF64Raw(a, b)
testSgnjxF64Raw(a, b)
}
def testTransferF32() : Unit = { def testTransferF32() : Unit = {
val (a,b,r,f) = f32.transfer.RAW.f32_f32_i32 val (a,b,r,f) = f32.transfer.RAW.f32_f32_i32
testTransferF32Raw(a, Random.nextBoolean(), Random.nextBoolean()) testTransferF32Raw(a, Random.nextBoolean(), Random.nextBoolean())
@ -985,20 +1176,35 @@ class FpuTest extends FunSuite{
testCvtF64F32Raw(a, r, f, rounding) testCvtF64F32Raw(a, r, f, rounding)
} }
def testClass() : Unit = { def testClassF32() : Unit = {
val (a,b,r,f) = f32.fclass.RAW.f32_f32_i32 val (a,b,r,f) = f32.fclass.RAW.f32_f32_i32
testClassRaw(a) testClassRaw(a)
} }
def testMin() : Unit = { def testMinF32() : Unit = {
val (a,b,r,f) = f32.min.RAW.f32_f32_f32 val (a,b,r,f) = f32.min.RAW.f32_f32_f32
testMinExact(a,b) testMinExact(a,b)
} }
def testMax() : Unit = { def testMaxF32() : Unit = {
val (a,b,r,f) = f32.max.RAW.f32_f32_f32 val (a,b,r,f) = f32.max.RAW.f32_f32_f32
testMaxExact(a,b) testMaxExact(a,b)
} }
def testClassF64() : Unit = {
val (a,b,r,f) = f64.fclass.RAW.f64_f64_i32
testClassF64Raw(a)
}
def testMinF64() : Unit = {
val (a,b,r,f) = f64.min.RAW.f64_f64_f64
testMinF64Exact(a,b)
}
def testMaxF64() : Unit = {
val (a,b,r,f) = f64.max.RAW.f64_f64_f64
testMaxF64Exact(a,b)
}
def testUI2f32() : Unit = { def testUI2f32() : Unit = {
val rounding = FpuRoundMode.elements.randomPick() val rounding = FpuRoundMode.elements.randomPick()
val (a,b,f) = f32.i2f(rounding).i32_f32 val (a,b,f) = f32.i2f(rounding).i32_f32
@ -1061,21 +1267,69 @@ class FpuTest extends FunSuite{
} }
val f32Tests = List[() => Unit](testSubF32, testAddF32, testMulF32, testI2f32, testUI2f32, testMin, testMax, testSgnj, testTransferF32, testDiv, testSqrt, testF2iF32, testF2uiF32, testLe, testEq, testLt, testClass, testFma) val f32Tests = List[() => Unit](testSubF32, testAddF32, testMulF32, testI2f32, testUI2f32, testMinF32, testMaxF32, testSgnjF32, testTransferF32, testDiv, testSqrt, testF2iF32, testF2uiF32, testLeF32, testEqF32, testLtF32, testClassF32, testFmaF32)
val f64Tests = List[() => Unit](testSubF64, testAddF64, testMulF64, testI2f64, testUI2f64, testMinF64, testMaxF64, testSgnjF64, testTransferF64, testDiv, testSqrt, testF2iF64, testF2uiF64, testLeF64, testEqF64, testLtF64, testClassF64, testFmaF64, testCvtF32F64, testCvtF64F32)
var fxxTests = f32Tests
if(p.withDouble) fxxTests ++= f64Tests
//TODO test boxing //TODO test boxing
//TODO double <-> simple convertions //TODO double <-> simple convertions
if(p.withDouble) { if(p.withDouble) {
for(_ <- 0 until 10000) testCvtF64F32() // 1 did not equal 3 Flag missmatch dut=1 ref=3 testCvtF64F32Raw 1.1754942807573643E-38 1.17549435E-38 RMM
println("FCVT_D_S done")
for(_ <- 0 until 10000) testCvtF32F64() for(_ <- 0 until 10000) testCvtF32F64()
println("FCVT_S_D done") println("FCVT_S_D done")
for(_ <- 0 until 10000) testCvtF64F32()
println("FCVT_D_S done") for(_ <- 0 until 10000) testF2iF64()
println("f64 f2i done")
for(_ <- 0 until 10000) testF2uiF64()
println("f64 f2ui done")
for(_ <- 0 until 10000) testSgnjF64()
println("f64 sgnj done")
for(_ <- 0 until 10000) testMinF64()
for(_ <- 0 until 10000) testMaxF64()
println("f64 minMax done")
for(i <- 0 until 1000) testFmaF64()
flagClear()
println("f64 fma done") //TODO
for(_ <- 0 until 10000) testLeF64()
for(_ <- 0 until 10000) testLtF64()
for(_ <- 0 until 10000) testEqF64()
println("f64 Cmp done")
for(_ <- 0 until 10000) testDivF64()
println("f64 div done")
for(_ <- 0 until 10000) testSqrtF64()
println("f64 sqrt done")
for(_ <- 0 until 10000) testClassF64()
println("f64 class done")
//
for(_ <- 0 until 10000) testAddF64() for(_ <- 0 until 10000) testAddF64()
for(_ <- 0 until 10000) testSubF64() for(_ <- 0 until 10000) testSubF64()
println("Add done") println("f64 Add done")
// testI2f64Exact(0x7FFFFFF5, 0x7FFFFFF5, 0, true, FpuRoundMode.RNE) // testI2f64Exact(0x7FFFFFF5, 0x7FFFFFF5, 0, true, FpuRoundMode.RNE)
@ -1083,9 +1337,7 @@ class FpuTest extends FunSuite{
for(_ <- 0 until 10000) testI2f64() for(_ <- 0 until 10000) testI2f64()
println("f64 i2f done") println("f64 i2f done")
for(_ <- 0 until 10000) testF2uiF64()
for(_ <- 0 until 10000) testF2iF64()
println("f64 f2i done")
// testF2iExact(1.0f,1, 0, false, FpuRoundMode.RTZ) // testF2iExact(1.0f,1, 0, false, FpuRoundMode.RTZ)
// testF2iExact(2.0f,2, 0, false, FpuRoundMode.RTZ) // testF2iExact(2.0f,2, 0, false, FpuRoundMode.RTZ)
@ -1156,7 +1408,7 @@ class FpuTest extends FunSuite{
for(i <- 0 until 1000) testFma() for(i <- 0 until 1000) testFmaF32()
flagClear() flagClear()
println("fma done") //TODO println("fma done") //TODO
@ -1166,9 +1418,9 @@ class FpuTest extends FunSuite{
testEqRaw(Float.PositiveInfinity,Float.PositiveInfinity,1, 0) testEqRaw(Float.PositiveInfinity,Float.PositiveInfinity,1, 0)
testEqRaw(0f, 0f,1, 0) testEqRaw(0f, 0f,1, 0)
for(_ <- 0 until 10000) testLe() for(_ <- 0 until 10000) testLeF32()
for(_ <- 0 until 10000) testLt() for(_ <- 0 until 10000) testLtF32()
for(_ <- 0 until 10000) testEq() for(_ <- 0 until 10000) testEqF32()
println("Cmp done") println("Cmp done")
@ -1178,16 +1430,16 @@ class FpuTest extends FunSuite{
for(_ <- 0 until 10000) testSqrt() for(_ <- 0 until 10000) testSqrt()
println("f32 sqrt done") println("f32 sqrt done")
for(_ <- 0 until 10000) testSgnj() for(_ <- 0 until 10000) testSgnjF32()
println("f32 sgnj done") println("f32 sgnj done")
for(_ <- 0 until 10000) testClass() for(_ <- 0 until 10000) testClassF32()
println("f32 class done") println("f32 class done")
for(_ <- 0 until 10000) testMin() for(_ <- 0 until 10000) testMinF32()
for(_ <- 0 until 10000) testMax() for(_ <- 0 until 10000) testMaxF32()
println("minMax done") println("minMax done")
@ -1229,11 +1481,13 @@ class FpuTest extends FunSuite{
// dut.clockDomain.waitSampling(1000) // dut.clockDomain.waitSampling(1000)
// simSuccess() // simSuccess()
for(i <- 0 until 1000) f32Tests.randomPick()() for(i <- 0 until 10000) fxxTests.randomPick()()
waitUntil(cpu.rspQueue.isEmpty) waitUntil(cpu.rspQueue.isEmpty)
} }
stim.foreach(_.join()) stim.foreach(_.join())
dut.clockDomain.waitSampling(100) dut.clockDomain.waitSampling(100)
} }