fpu f64 load/store/mv/mul seems ok

This commit is contained in:
Dolu1990 2021-02-11 16:07:47 +01:00
parent e97c2de837
commit b6eda1ad7a
2 changed files with 346 additions and 166 deletions

View File

@ -21,6 +21,8 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
val exponentOne = (1 << p.internalExponentSize-1) - 1
val exponentF32Subnormal = exponentOne-127
val exponentF64Subnormal = exponentOne-1023
val exponentF32Infinity = exponentOne+127+1
val exponentF64Infinity = exponentOne+1023+1
val rfLockCount = 5
val lockIdType = HardType(UInt(log2Up(rfLockCount) bits))
@ -30,6 +32,11 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
if(!p.withDouble) no
}
def muxDouble[T <: Data](format : FpuFormat.C)(yes : => T)(no : => T): T ={
if(p.withDouble) ((format === FpuFormat.DOUBLE) ? { yes } | { no })
else no
}
case class RfReadInput() extends Bundle{
val source = Source()
val opcode = p.Opcode()
@ -254,11 +261,16 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
output.rs3 := rs3Entry.value
if(p.withDouble){
output.format := s1.format
when(s1.format === FpuFormat.FLOAT =/= rs1Entry.boxed){
val store = s1.opcode === FpuOpcode.STORE ||s1.opcode === FpuOpcode.FMV_X_W
when(store){ //Pass through
output.format := rs1Entry.boxed ? FpuFormat.FLOAT | FpuFormat.DOUBLE
} elsewhen(s1.format === FpuFormat.FLOAT =/= rs1Entry.boxed){
output.rs1.setNanQuiet
output.rs1.sign := False
}
when(s1.format === FpuFormat.FLOAT =/= rs2Entry.boxed){
output.rs2.setNanQuiet
output.rs2.sign := False
}
when(s1.format === FpuFormat.FLOAT =/= rs3Entry.boxed){
output.rs3.setNanQuiet
@ -364,7 +376,13 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
output.i2f := input.i2f
output.arg := input.arg
output.roundMode := input.roundMode
if(p.withDouble) output.format := input.format
if(p.withDouble) {
output.format := input.format
when(!input.i2f && input.format === FpuFormat.DOUBLE && output.value(63 downto 32).andR){ //Detect boxing
output.format := FpuFormat.FLOAT
}
}
}
val s1 = new Area{
@ -378,25 +396,34 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
}
val f64 = p.withDouble generate new Area{
val mantissa = input.value(0, 52 bits).asUInt
val exponent = input.value(11, 52 bits).asUInt
val exponent = input.value(52, 11 bits).asUInt
val sign = input.value(63)
}
val recodedExpOffset = UInt(p.internalExponentSize bits)
val passThroughFloat = p.internalFloating()
passThroughFloat.special := False
passThroughFloat.sign := f32.sign
passThroughFloat.exponent := f32.exponent.resized
passThroughFloat.mantissa := f32.mantissa << (if(p.withDouble) 29 else 0)
if(p.withDouble) when(input.format === FpuFormat.DOUBLE){
whenDouble(input.format){
passThroughFloat.sign := f64.sign
passThroughFloat.exponent := f64.exponent.resized
passThroughFloat.mantissa := f64.mantissa
recodedExpOffset := exponentF64Subnormal
} {
passThroughFloat.sign := f32.sign
passThroughFloat.exponent := f32.exponent.resized
passThroughFloat.mantissa := f32.mantissa << (if (p.withDouble) 29 else 0)
recodedExpOffset := exponentF32Subnormal
}
val manZero = passThroughFloat.mantissa === 0
val expZero = passThroughFloat.exponent === 0
val expOne = passThroughFloat.exponent(7 downto 0).andR
if(p.withDouble) expOne.clearWhen(input.format === FpuFormat.DOUBLE && !passThroughFloat.exponent(11 downto 8).andR)
if(p.withDouble) {
expZero.clearWhen(input.format === FpuFormat.DOUBLE && input.value(62 downto 60) =/= 0)
expOne.clearWhen(input.format === FpuFormat.DOUBLE && input.value(62 downto 60) =/= 7)
}
val isZero = expZero && manZero
val isSubnormal = expZero && !manZero
@ -409,9 +436,10 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
val ohInputWidth = 32 max p.internalMantissaSize
val ohInput = Bits(ohInputWidth bits).assignDontCare()
when(!input.i2f) {
if(!p.withDouble) ohInput(ohInputWidth-23, 23 bits) := input.value(0, 23 bits)
if(!p.withDouble) ohInput := input.value(0, 23 bits) << 9
if( p.withDouble) ohInput := passThroughFloat.mantissa.asBits
} otherwise {
ohInput(ohInputWidth-32-1 downto 0) := 0
ohInput(ohInputWidth-32, 32 bits) := input.value(31 downto 0)
}
@ -426,15 +454,15 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
}
val output = RegNextWhen(logic, !done)
}
shift.input := (input.value.asUInt |<< 1).resized
shift.input := (ohInput.asUInt |<< 1).resized
val subnormalShiftOffset = if(!p.withDouble) U(9) else ((input.format === FpuFormat.DOUBLE) ? U(0) | U(0))
val subnormalExpOffset = if(!p.withDouble) U(9) else ((input.format === FpuFormat.DOUBLE) ? U(0) | U(0))
val subnormalShiftOffset = if(!p.withDouble) U(0) else ((input.format === FpuFormat.DOUBLE) ? U(0) | U(0)) //TODO remove ?
val subnormalExpOffset = if(!p.withDouble) U(0) else ((input.format === FpuFormat.DOUBLE) ? U(0) | U(0))
when(input.valid && (input.i2f || isSubnormal) && !done){
busy := True
when(boot){
when(input.i2f && !patched && input.value.msb && input.arg(0)){
when(input.i2f && !patched && input.value(31) && input.arg(0)){
input.value.getDrivingReg(0, 32 bits) := B(input.value.asUInt.twoComplement(True).resize(32 bits))
patched := True
} otherwise {
@ -467,7 +495,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
val recoded = p.internalFloating()
recoded.mantissa := passThroughFloat.mantissa
recoded.exponent := (passThroughFloat.exponent -^ fsm.expOffset + exponentF32Subnormal).resized
recoded.exponent := (passThroughFloat.exponent -^ fsm.expOffset + recodedExpOffset).resized
recoded.sign := passThroughFloat.sign
recoded.setNormal
when(isZero){recoded.setZero}
@ -480,9 +508,6 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
output.roundMode := input.roundMode
if(p.withDouble) {
output.format := input.format
when(!input.i2f && input.format === FpuFormat.DOUBLE && input.value(63 downto 23).andR){ //Detect boxing
output.format := FpuFormat.FLOAT
}
}
output.rd := input.rd
output.value.sign := recoded.sign
@ -523,9 +548,15 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
val exp = (input.rs1.exponent - (exponentOne-1023)).resize(11 bits)
val man = CombInit(input.rs1.mantissa)
}
recodedResult := (if(p.withDouble) B"xFFFFFFFF" else B"") ## input.rs1.sign ## f32.exp ## f32.man
val expInSubnormalRange = input.rs1.exponent <= exponentOne - 127
whenDouble(input.format){
recodedResult := input.rs1.sign ## f64.exp ## f64.man
} {
recodedResult := (if(p.withDouble) B"xFFFFFFFF" else B"") ## input.rs1.sign ## f32.exp ## f32.man
}
val expSubnormalThreshold = muxDouble[UInt](input.format)(exponentF64Subnormal)(exponentF32Subnormal)
val expInSubnormalRange = input.rs1.exponent <= expSubnormalThreshold
val isSubnormal = !input.rs1.special && expInSubnormalRange
val isNormal = !input.rs1.special && !expInSubnormalRange
val fsm = new Area{
@ -552,14 +583,14 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
shift.input := (U(!isZero) @@ input.rs1.mantissa) << (if(p.withDouble) 0 else 9)
val formatShiftOffset = muxDouble[UInt](input.format)(exponentOne-1023+1)(exponentOne - (if(p.withDouble) (127+34) else (127-10)))
when(input.valid && (needRecoding || isF2i) && !done){
halt := True
when(boot){
when(isF2i){
shift.by := (U(exponentOne + 31) - input.rs1.exponent).min(U(33)).resized //TODO merge
shift.by := ((U(exponentOne + 31) - input.rs1.exponent).min(U(33)) + (if(p.withDouble) 20 else 0)).resized //TODO merge
} otherwise {
shift.by := (U(exponentOne - 127+10) - input.rs1.exponent).resized
shift.by := (formatShiftOffset - input.rs1.exponent).resized
}
boot := False
} otherwise {
@ -619,7 +650,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
when(mantissaForced){
recodedResult(0,23 bits) := (default -> mantissaForcedValue)
whenDouble(input.format){
recodedResult(52-23, 52-23 bits) := (default -> exponentForcedValue)
recodedResult(23, 52-23 bits) := (default -> mantissaForcedValue)
}{}
}
when(exponentForced){
@ -764,10 +795,6 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
}
val norm = new Area{
// val needShift = math.mulC.msb
// val exp = math.exp + U(needShift)
// val man = needShift ? math.mulC(p.internalMantissaSize + 1, p.internalMantissaSize bits) | math.mulC(p.internalMantissaSize, p.internalMantissaSize bits)
val (mulHigh, mulLow) = math.mulC.splitAt(p.internalMantissaSize-1)
val scrap = mulLow =/= 0
val needShift = mulHigh.msb
@ -775,7 +802,9 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
val man = needShift ? mulHigh(1, p.internalMantissaSize+1 bits) | mulHigh(0, p.internalMantissaSize+1 bits)
scrap setWhen(needShift && mulHigh(0))
val forceZero = input.rs1.isZero || input.rs2.isZero
val forceUnderflow = exp < exponentOne + exponentOne - 127 - 24 // 0x6A //TODO
val underflowThreshold = muxDouble[UInt](input.format)(exponentOne + exponentOne - 1023 - 53) (exponentOne + exponentOne - 127 - 24)
val underflowExp = muxDouble[UInt](input.format)(exponentOne - 1023 - 54) (exponentOne - 127 - 25)
val forceUnderflow = exp < underflowThreshold
val forceOverflow = input.rs1.isInfinity || input.rs2.isInfinity
val infinitynan = ((input.rs1.isInfinity || input.rs2.isInfinity) && (input.rs1.isZero || input.rs2.isZero))
val forceNan = input.rs1.isNan || input.rs2.isNan || infinitynan
@ -797,7 +826,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
} elsewhen(forceZero) {
output.setZero
} elsewhen(forceUnderflow) {
output.exponent := exponentOne - 127 - 25
output.exponent := underflowExp.resized
}
}
@ -1123,11 +1152,14 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
val round = new Area{
val input = merge.commited.combStage
//TODO do not break NAN payload (seems already fine)
val manAggregate = input.value.mantissa @@ input.scrap
val expDif = (exponentOne-126) -^ input.value.exponent
val expBase = muxDouble[UInt](input.format)(exponentF64Subnormal+1)(exponentF32Subnormal+1)
val expDif = expBase -^ input.value.exponent
val expSubnormal = !expDif.msb
val discardCount = expSubnormal ? expDif.resize(log2Up(p.internalMantissaSize) bits) | U(0)
var discardCount = (expSubnormal ? expDif.resize(log2Up(p.internalMantissaSize) bits) | U(0))
if(p.withDouble) when(input.format === FpuFormat.FLOAT){
discardCount \= discardCount + 29
}
val exactMask = (List(True) ++ (0 until p.internalMantissaSize+1).map(_ < discardCount)).asBits.asUInt
val roundAdjusted = (True ## (manAggregate>>1))(discardCount) ## ((manAggregate & exactMask) =/= 0)
@ -1156,10 +1188,16 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
// uf := True
// }
when(!math.special && math.exponent <= exponentOne-127 && roundAdjusted.asUInt =/= 0){ //Do not catch exact 1.17549435E-38 underflow, but, who realy care ?
val ufSubnormalThreshold = muxDouble[UInt](input.format)(exponentF64Subnormal)(exponentF32Subnormal)
val ufThreshold = muxDouble[UInt](input.format)(exponentF64Subnormal-52+1)(exponentF32Subnormal-23+1)
val ofThreshold = muxDouble[UInt](input.format)(exponentF64Infinity-1)(exponentF32Infinity-1)
when(!math.special && math.exponent <= ufSubnormalThreshold && roundAdjusted.asUInt =/= 0){ //Do not catch exact 1.17549435E-38 underflow, but, who realy care ?
uf := True
}
when(!math.special && math.exponent >= exponentOne + 128){
when(!math.special && math.exponent > ofThreshold){
nx := True
of := True
val doMax = input.roundMode.mux(
@ -1170,7 +1208,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
FpuRoundMode.RMM -> (False)
)
when(doMax){
patched.exponent := exponentOne + 127
patched.exponent := ofThreshold
patched.mantissa.setAll()
} otherwise {
patched.setInfinity
@ -1178,7 +1216,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
}
when(!math.special && math.exponent <= exponentOne - 127-23){
when(!math.special && math.exponent < ufThreshold){
nx := True
uf := True
val doMin = input.roundMode.mux(
@ -1189,7 +1227,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
FpuRoundMode.RMM -> (False)
)
when(doMin){
patched.exponent := exponentOne - 127-23+1
patched.exponent := ufThreshold.resized
patched.mantissa := 0
} otherwise {
patched.setZero

View File

@ -23,7 +23,17 @@ import scala.util.Random
class FpuTest extends FunSuite{
val b2f = lang.Float.intBitsToFloat(_)
val b2d = lang.Double.longBitsToDouble(_)
val f2b = lang.Float.floatToRawIntBits(_)
val d2bOffset = BigInt("10000000000000000",16)
def d2b(that : Double) = {
val l = lang.Double.doubleToRawLongBits(that)
var a = BigInt(l)
if(l < 0) {
a = d2bOffset + a
}
a
}
test("f32f64") {
@ -64,24 +74,6 @@ class FpuTest extends FunSuite{
def f32_f32_f32 ={
val s = new Scanner(next)
val a,b,c = (s.nextLong(16).toInt)
// if(b2f(a).isNaN || b2f(b).isNaN){
// print("NAN => ")
// if(((a >> 23) & 0xFF) == 0xFF && ((a >> 0) & 0xEFFFFF) != 0){
// print(a.toHexString)
// print(" " + f2b(b2f(a)).toHexString)
// }
// if(((b >> 23) & 0xFF) == 0xFF && ((b >> 0) & 0xEFFFFF) != 0){
// print(b.toHexString)
// print(" " + f2b(b2f(b)).toHexString)
// }
// if(((c >> 23) & 0xFF) == 0xFF && ((c >> 0) & 0xEFFFFF) != 0){
// print(" " + c.toHexString)
// print(" " + f2b(b2f(c)).toHexString)
// }
//
// print(" " + simTime())
// println("")
// }
(b2f(a), b2f(b), b2f(c), s.nextInt(16))
}
@ -105,8 +97,39 @@ class FpuTest extends FunSuite{
val s = new Scanner(next)
val a,b = (s.nextLong(16).toInt)
(b2f(a), b2f(b), s.nextInt(16))
}
}
def nextLong(s : Scanner) : Long = java.lang.Long.parseUnsignedLong( s.next(),16)
def f64_f64_f64 ={
val s = new Scanner(next)
val a,b,c = nextLong(s)
(b2d(a), b2d(b), b2d(c), s.nextInt(16))
}
def i32_f64 ={
val s = new Scanner(next)
(s.nextLong(16).toInt, b2d(nextLong(s)), s.nextInt(16))
}
def f64_i32 = {
val s = new Scanner(next)
(b2d(nextLong(s)), s.nextLong(16).toInt, s.nextInt(16))
}
def f64_f64_i32 = {
val str = next
val s = new Scanner(str)
val a,b,c = (nextLong(s))
(b2d(a), b2d(b), c, s.nextInt(16))
}
def f64_f64 = {
val s = new Scanner(next)
val a,b = (s.nextLong(16))
(b2d(a), b2d(b), s.nextInt(16))
}
}
lazy val RAW = build("")
lazy val RNE = build("-rnear_even")
@ -125,28 +148,33 @@ class FpuTest extends FunSuite{
}
}
val f32 = new {
val add = new TestCase("f32_add")
val sub = new TestCase("f32_sub")
val mul = new TestCase("f32_mul")
val ui2f = new TestCase("ui32_to_f32")
val i2f = new TestCase("i32_to_f32")
val f2ui = new TestCase("f32_to_ui32 -exact")
val f2i = new TestCase("f32_to_i32 -exact")
val eq = new TestCase("f32_eq")
val lt = new TestCase("f32_lt")
val le = new TestCase("f32_le")
val min = new TestCase("f32_le")
val max = new TestCase("f32_lt")
val transfer = new TestCase("f32_eq")
val fclass = new TestCase("f32_eq")
val sgnj = new TestCase("f32_eq")
val sgnjn = new TestCase("f32_eq")
val sgnjx = new TestCase("f32_eq")
val sqrt = new TestCase("f32_sqrt")
val div = new TestCase("f32_div")
class TestVector(f : String) {
val add = new TestCase(s"${f}_add")
val sub = new TestCase(s"${f}_sub")
val mul = new TestCase(s"${f}_mul")
val ui2f = new TestCase(s"ui32_to_${f}")
val i2f = new TestCase(s"i32_to_${f}")
val f2ui = new TestCase(s"${f}_to_ui32 -exact")
val f2i = new TestCase(s"${f}_to_i32 -exact")
val eq = new TestCase(s"${f}_eq")
val lt = new TestCase(s"${f}_lt")
val le = new TestCase(s"${f}_le")
val min = new TestCase(s"${f}_le")
val max = new TestCase(s"${f}_lt")
val transfer = new TestCase(s"${f}_eq")
val fclass = new TestCase(s"${f}_eq")
val sgnj = new TestCase(s"${f}_eq")
val sgnjn = new TestCase(s"${f}_eq")
val sgnjx = new TestCase(s"${f}_eq")
val sqrt = new TestCase(s"${f}_sqrt")
val div = new TestCase(s"${f}_div")
val f32 = new TestCase(s"${f}_eq")
val f64 = new TestCase(s"${f}_eq")
}
val f32 = new TestVector("f32")
val f64 = new TestVector("f64")
val cpus = for(id <- 0 until portCount) yield new {
val cmdQueue = mutable.Queue[FpuCmd => Unit]()
val commitQueue = mutable.Queue[FpuCommit => Unit]()
@ -165,9 +193,15 @@ class FpuTest extends FunSuite{
val patch = if(value.abs == 1.17549435E-38f) ref & ~2 else ref
flagMatch(patch, report)
}
def flagMatch(ref : Int, value : Double, report : String): Unit ={
val patch = if(value.abs == b2d(1 << 52)) ref & ~2 else ref
flagMatch(patch, report)
}
def flagMatch(ref : Int, report : String): Unit ={
waitUntil(pendingMiaou == 0)
assert(flagAccumulator == ref, s"Flag missmatch dut=$flagAccumulator ref=$ref $report")
softAssert(flagAccumulator == ref, s"Flag missmatch dut=$flagAccumulator ref=$ref $report")
flagAccumulator = 0
}
def flagClear(): Unit ={
@ -231,6 +265,10 @@ class FpuTest extends FunSuite{
loadRaw(rd, f2b(value).toLong & 0xFFFFFFFFl, FpuFormat.FLOAT)
}
def load(rd : Int, value : Double): Unit ={
loadRaw(rd, d2b(value), FpuFormat.DOUBLE)
}
def storeRaw(rs : Int, format : FpuFormat.E)(body : FpuRsp => Unit): Unit ={
cmdAdd {cmd =>
cmd.opcode #= cmd.opcode.spinalEnum.STORE
@ -250,8 +288,11 @@ class FpuTest extends FunSuite{
def storeFloat(rs : Int)(body : Float => Unit): Unit ={
storeRaw(rs, FpuFormat.FLOAT){rsp => body(b2f(rsp.value.toBigInt.toInt))}
}
def store(rs : Int)(body : Double => Unit): Unit ={
storeRaw(rs, FpuFormat.DOUBLE){rsp => body(b2d(rsp.value.toBigInt.toLong))}
}
def fpuF2f(rd : Int, rs1 : Int, rs2 : Int, rs3 : Int, opcode : FpuOpcode.E, arg : Int, rounding : FpuRoundMode.E = FpuRoundMode.RNE): Unit ={
def fpuF2f(rd : Int, rs1 : Int, rs2 : Int, rs3 : Int, opcode : FpuOpcode.E, arg : Int, rounding : FpuRoundMode.E, format : FpuFormat.E): Unit ={
cmdAdd {cmd =>
cmd.opcode #= opcode
cmd.rs1 #= rs1
@ -260,6 +301,7 @@ class FpuTest extends FunSuite{
cmd.rd #= rd
cmd.arg #= arg
cmd.roundMode #= rounding
cmd.format #= format
}
commitQueue += {cmd =>
cmd.write #= true
@ -267,7 +309,7 @@ class FpuTest extends FunSuite{
}
}
def fpuF2i(rs1 : Int, rs2 : Int, opcode : FpuOpcode.E, arg : Int, rounding : FpuRoundMode.E = FpuRoundMode.RNE)(body : FpuRsp => Unit): Unit ={
def fpuF2i(rs1 : Int, rs2 : Int, opcode : FpuOpcode.E, arg : Int, rounding : FpuRoundMode.E, format : FpuFormat.E)(body : FpuRsp => Unit): Unit ={
cmdAdd {cmd =>
cmd.opcode #= opcode
cmd.rs1 #= rs1
@ -276,58 +318,59 @@ class FpuTest extends FunSuite{
cmd.rd.randomize()
cmd.arg #= arg
cmd.roundMode #= rounding
cmd.format #= format
}
rspQueue += body
}
def mul(rd : Int, rs1 : Int, rs2 : Int, rounding : FpuRoundMode.E = FpuRoundMode.RNE): Unit ={
fpuF2f(rd, rs1, rs2, Random.nextInt(32), FpuOpcode.MUL, 0, rounding)
def mul(rd : Int, rs1 : Int, rs2 : Int, rounding : FpuRoundMode.E, format : FpuFormat.E): Unit ={
fpuF2f(rd, rs1, rs2, Random.nextInt(32), FpuOpcode.MUL, 0, rounding, format)
}
def add(rd : Int, rs1 : Int, rs2 : Int, rounding : FpuRoundMode.E = FpuRoundMode.RNE): Unit ={
fpuF2f(rd, rs1, rs2, Random.nextInt(32), FpuOpcode.ADD, 0, rounding)
def add(rd : Int, rs1 : Int, rs2 : Int, rounding : FpuRoundMode.E = FpuRoundMode.RNE, format : FpuFormat.E): Unit ={
fpuF2f(rd, rs1, rs2, Random.nextInt(32), FpuOpcode.ADD, 0, rounding, format)
}
def sub(rd : Int, rs1 : Int, rs2 : Int, rounding : FpuRoundMode.E = FpuRoundMode.RNE): Unit ={
fpuF2f(rd, rs1, rs2, Random.nextInt(32), FpuOpcode.ADD, 1, rounding)
def sub(rd : Int, rs1 : Int, rs2 : Int, rounding : FpuRoundMode.E = FpuRoundMode.RNE, format : FpuFormat.E): Unit ={
fpuF2f(rd, rs1, rs2, Random.nextInt(32), FpuOpcode.ADD, 1, rounding, format)
}
def div(rd : Int, rs1 : Int, rs2 : Int, rounding : FpuRoundMode.E = FpuRoundMode.RNE): Unit ={
fpuF2f(rd, rs1, rs2, Random.nextInt(32), FpuOpcode.DIV, Random.nextInt(4), rounding)
def div(rd : Int, rs1 : Int, rs2 : Int, rounding : FpuRoundMode.E = FpuRoundMode.RNE, format : FpuFormat.E): Unit ={
fpuF2f(rd, rs1, rs2, Random.nextInt(32), FpuOpcode.DIV, Random.nextInt(4), rounding, format)
}
def sqrt(rd : Int, rs1 : Int, rounding : FpuRoundMode.E = FpuRoundMode.RNE): Unit ={
fpuF2f(rd, rs1, Random.nextInt(32), Random.nextInt(32), FpuOpcode.SQRT, Random.nextInt(4), rounding)
def sqrt(rd : Int, rs1 : Int, rounding : FpuRoundMode.E = FpuRoundMode.RNE, format : FpuFormat.E): Unit ={
fpuF2f(rd, rs1, Random.nextInt(32), Random.nextInt(32), FpuOpcode.SQRT, Random.nextInt(4), rounding, format)
}
def fma(rd : Int, rs1 : Int, rs2 : Int, rs3 : Int, rounding : FpuRoundMode.E = FpuRoundMode.RNE): Unit ={
fpuF2f(rd, rs1, rs2, rs3, FpuOpcode.FMA, 0, rounding)
def fma(rd : Int, rs1 : Int, rs2 : Int, rs3 : Int, rounding : FpuRoundMode.E, format : FpuFormat.E): Unit ={
fpuF2f(rd, rs1, rs2, rs3, FpuOpcode.FMA, 0, rounding, format)
}
def sgnjRaw(rd : Int, rs1 : Int, rs2 : Int, arg : Int): Unit ={
fpuF2f(rd, rs1, rs2, Random.nextInt(32), FpuOpcode.SGNJ, arg, FpuRoundMode.elements.randomPick())
def sgnjRaw(rd : Int, rs1 : Int, rs2 : Int, arg : Int, format : FpuFormat.E): Unit ={
fpuF2f(rd, rs1, rs2, Random.nextInt(32), FpuOpcode.SGNJ, arg, FpuRoundMode.elements.randomPick(), format)
}
def sgnj(rd : Int, rs1 : Int, rs2 : Int, rounding : FpuRoundMode.E = null): Unit ={
sgnjRaw(rd, rs1, rs2, 0)
def sgnj(rd : Int, rs1 : Int, rs2 : Int, rounding : FpuRoundMode.E = null, format : FpuFormat.E): Unit ={
sgnjRaw(rd, rs1, rs2, 0, format)
}
def sgnjn(rd : Int, rs1 : Int, rs2 : Int, rounding : FpuRoundMode.E = null): Unit ={
sgnjRaw(rd, rs1, rs2, 1)
def sgnjn(rd : Int, rs1 : Int, rs2 : Int, rounding : FpuRoundMode.E = null, format : FpuFormat.E): Unit ={
sgnjRaw(rd, rs1, rs2, 1, format)
}
def sgnjx(rd : Int, rs1 : Int, rs2 : Int, rounding : FpuRoundMode.E = null): Unit ={
sgnjRaw(rd, rs1, rs2, 2)
def sgnjx(rd : Int, rs1 : Int, rs2 : Int, rounding : FpuRoundMode.E = null, format : FpuFormat.E): Unit ={
sgnjRaw(rd, rs1, rs2, 2, format)
}
def cmp(rs1 : Int, rs2 : Int, arg : Int = 1)(body : FpuRsp => Unit): Unit ={
fpuF2i(rs1, rs2, FpuOpcode.CMP, arg, FpuRoundMode.elements.randomPick())(body)
def cmp(rs1 : Int, rs2 : Int, arg : Int, format : FpuFormat.E)(body : FpuRsp => Unit): Unit ={
fpuF2i(rs1, rs2, FpuOpcode.CMP, arg, FpuRoundMode.elements.randomPick(), format)(body)
}
def f2i(rs1 : Int, signed : Boolean, rounding : FpuRoundMode.E = FpuRoundMode.RNE)(body : FpuRsp => Unit): Unit ={
fpuF2i(rs1, Random.nextInt(32), FpuOpcode.F2I, if(signed) 1 else 0, rounding)(body)
def f2i(rs1 : Int, signed : Boolean, rounding : FpuRoundMode.E, format : FpuFormat.E)(body : FpuRsp => Unit): Unit ={
fpuF2i(rs1, Random.nextInt(32), FpuOpcode.F2I, if(signed) 1 else 0, rounding, format)(body)
}
def i2f(rd : Int, value : Int, signed : Boolean, rounding : FpuRoundMode.E): Unit ={
def i2f(rd : Int, value : Int, signed : Boolean, rounding : FpuRoundMode.E, format : FpuFormat.E): Unit ={
cmdAdd {cmd =>
cmd.opcode #= cmd.opcode.spinalEnum.I2F
cmd.rs1.randomize()
@ -336,6 +379,7 @@ class FpuTest extends FunSuite{
cmd.rd #= rd
cmd.arg #= (if(signed) 1 else 0)
cmd.roundMode #= rounding
cmd.format #= format
}
commitQueue += {cmd =>
cmd.write #= true
@ -451,13 +495,13 @@ class FpuTest extends FunSuite{
}
def testBinaryOp(op : (Int,Int,Int,FpuRoundMode.E) => Unit, a : Float, b : Float, ref : Float, flag : Int, rounding : FpuRoundMode.E, opName : String): Unit ={
def testBinaryOp(op : (Int,Int,Int,FpuRoundMode.E, FpuFormat.E) => Unit, a : Float, b : Float, ref : Float, flag : Int, rounding : FpuRoundMode.E, opName : String): Unit ={
val rs = new RegAllocator()
val rs1, rs2, rs3 = rs.allocate()
val rd = Random.nextInt(32)
load(rs1, a)
load(rs2, b)
op(rd,rs1,rs2, rounding)
op(rd,rs1,rs2, rounding, FpuFormat.FLOAT)
storeFloat(rd){v =>
assert(f2b(v) == f2b(ref), f"## ${a} ${opName} $b = $v, $ref $rounding")
}
@ -466,12 +510,25 @@ class FpuTest extends FunSuite{
}
def testBinaryOpF64(op : (Int,Int,Int,FpuRoundMode.E, FpuFormat.E) => Unit, a : Double, b : Double, ref : Double, flag : Int, rounding : FpuRoundMode.E, opName : String): Unit ={
val rs = new RegAllocator()
val rs1, rs2, rs3 = rs.allocate()
val rd = Random.nextInt(32)
load(rs1, a)
load(rs2, b)
op(rd,rs1,rs2, rounding, FpuFormat.DOUBLE)
store(rd){v =>
assert(d2b(v) == d2b(ref), f"## ${a} ${opName} $b = $v, $ref $rounding")
}
def testTransferRaw(a : Float, iSrc : Boolean, iDst : Boolean): Unit ={
flagMatch(flag, ref, f"## ${opName} ${a} $b $ref $rounding")
}
def testTransferF32Raw(a : Float, iSrc : Boolean, iDst : Boolean): Unit ={
val rd = Random.nextInt(32)
def handle(v : Float): Unit ={
val refUnclamped = a
val ref = a
assert(f2b(v) == f2b(ref), f"$a = $v, $ref")
}
@ -482,6 +539,49 @@ class FpuTest extends FunSuite{
flagMatch(0, f"$a")
}
def testTransferF64Raw(a : Double): Unit ={
val rd = Random.nextInt(32)
def handle(v : Double): Unit ={
val ref = a
assert(d2b(v) == d2b(ref), f"$a = $v, $ref")
}
load(rd, a)
store(rd)(handle)
flagMatch(0, f"$a")
}
def testTransferF32F64Raw(a : Float, iSrc : Boolean): Unit ={
val rd = Random.nextInt(32)
if(iSrc) fmv_w_x(rd, f2b(a)) else load(rd, a)
storeRaw(rd, FpuFormat.DOUBLE){rsp =>
val v = rsp.value.toBigInt.toLong
val ref = (0xFFFFFFFFl << 32) | f2b(a)
assert(v == ref, f"$a = $v, $ref")
}
flagMatch(0, f"$a")
}
def testTransferF64F32Raw(a : Double, iDst : Boolean): Unit ={
val rd = Random.nextInt(32)
load(rd, a)
if(iDst)fmv_x_w(rd){v_ =>
val v = f2b(v_).toLong & 0xFFFFFFFFl
val ref = d2b(a) & 0xFFFFFFFFl
assert(v == ref, f"$a = $v, $ref")
}
else storeRaw(rd, FpuFormat.FLOAT){rsp =>
val v = rsp.value.toBigInt.toLong & 0xFFFFFFFFl
val ref = d2b(a) & 0xFFFFFFFFl
assert(v == ref, f"$a = $v, $ref")
}
flagMatch(0, f"$a")
}
def testClassRaw(a : Float) : Unit = {
val rd = Random.nextInt(32)
@ -513,7 +613,7 @@ class FpuTest extends FunSuite{
load(rs2, b)
load(rs3, c)
fma(rd,rs1,rs2,rs3)
fma(rd,rs1,rs2,rs3, FpuRoundMode.RNE, FpuFormat.FLOAT)
storeFloat(rd){v =>
val ref = a.toDouble * b.toDouble + c.toDouble
println(f"$a%.20f * $b%.20f + $c%.20f = $v%.20f, $ref%.20f")
@ -530,7 +630,7 @@ class FpuTest extends FunSuite{
load(rs1, a)
load(rs2, b)
div(rd,rs1,rs2)
div(rd,rs1,rs2, FpuRoundMode.RNE, FpuFormat.FLOAT)
storeFloat(rd){v =>
val refUnclamped = a/b
val refClamped = ((a)/(b))
@ -547,7 +647,7 @@ class FpuTest extends FunSuite{
val rd = Random.nextInt(32)
load(rs1, a)
sqrt(rd,rs1)
sqrt(rd,rs1, FpuRoundMode.RNE, FpuFormat.FLOAT)
storeFloat(rd){v =>
val ref = Math.sqrt(a).toFloat
val error = Math.abs(ref-v)/ref
@ -564,7 +664,7 @@ class FpuTest extends FunSuite{
val rd = Random.nextInt(32)
load(rs1, a)
sqrt(rd,rs1)
sqrt(rd,rs1, FpuRoundMode.RNE, FpuFormat.FLOAT)
storeFloat(rd){v =>
val error = Math.abs(ref-v)/ref
println(f"sqrt($a) = $v, $ref $error $rounding")
@ -579,7 +679,7 @@ class FpuTest extends FunSuite{
load(rs1, a)
load(rs2, b)
div(rd,rs1, rs2)
div(rd,rs1, rs2, FpuRoundMode.RNE, FpuFormat.FLOAT)
storeFloat(rd){v =>
val error = Math.abs(ref-v)/ref
println(f"div($a, $b) = $v, $ref $error $rounding")
@ -594,16 +694,16 @@ class FpuTest extends FunSuite{
val rs1 = rs.allocate()
val rd = Random.nextInt(32)
load(rs1, a)
f2i(rs1, signed, rounding){rsp =>
f2i(rs1, signed, rounding, FpuFormat.FLOAT){rsp =>
if(signed) {
val v = rsp.value.toLong.toInt
val v = rsp.value.toBigInt.toInt
var ref2 = ref
if(a >= Int.MaxValue) ref2 = Int.MaxValue
if(a <= Int.MinValue) ref2 = Int.MinValue
if(a.isNaN) ref2 = Int.MaxValue
assert(v == (ref2), f" <= f2i($a) = $v, $ref2, $rounding, $flag")
} else {
val v = rsp.value.toLong
val v = rsp.value.toBigInt.toLong & 0xFFFFFFFFl
var ref2 = ref.toLong & 0xFFFFFFFFl
if(a < 0) ref2 = 0
if(a >= 0xFFFFFFFFl) ref2 = 0xFFFFFFFFl
@ -621,15 +721,15 @@ class FpuTest extends FunSuite{
def testI2fExact(a : Int, b : Float, f : Int, signed : Boolean, rounding : FpuRoundMode.E): Unit ={
val rs = new RegAllocator()
val rd = Random.nextInt(32)
i2f(rd, a, signed, rounding)
i2f(rd, a, signed, rounding, FpuFormat.FLOAT)
storeFloat(rd){v =>
val aLong = if(signed) a.toLong else a.toLong & 0xFFFFFFFFl
val ref = b
assert(f2b(v) == f2b(ref), f"i2f($aLong) = $v, $ref")
assert(f2b(v) == f2b(ref), f"i2f($aLong) = $v, $ref $rounding")
}
flagMatch(f, b, f"i2f() = $b")
flagMatch(f, b, f"i2f($a) = $b")
}
@ -640,7 +740,7 @@ class FpuTest extends FunSuite{
val rd = Random.nextInt(32)
load(rs1, a)
load(rs2, b)
cmp(rs1, rs2, arg){rsp =>
cmp(rs1, rs2, arg, FpuFormat.FLOAT){rsp =>
val v = rsp.value.toLong
assert(v === ref, f"cmp($a, $b, $arg) = $v, $ref")
}
@ -744,29 +844,6 @@ class FpuTest extends FunSuite{
}
}
// for(i <- 0 until 64){
// val rounding = FpuRoundMode.RMM
// val a = 24f
// val b = b2f(0x3f800000+i)
// val c = Clib.math.mulF32(a, b, rounding.position)
// val f = 0
// testMulExact(a,b,c,f, rounding)
// }
val binaryOps = List[(Int,Int,Int,FpuRoundMode.E) => Unit](add, sub, mul)
// testSqrt(0.0f)
// testSqrt(1.2f)
// for(a <- fAll) testSqrt(a)
// for(_ <- 0 until 1000) testSqrt(randomFloat())
def testFma() : Unit = {
testFmaRaw(randomFloat(), randomFloat(), randomFloat())
flagClear()
@ -786,13 +863,13 @@ class FpuTest extends FunSuite{
testEqRaw(a,b,i, f)
}
def testF2ui() : Unit = {
def testF2uiF32() : Unit = {
val rounding = FpuRoundMode.elements.randomPick()
val (a,b,f) = f32.f2ui(rounding).f32_i32
testF2iExact(a,b, f, false, rounding)
}
def testF2i() : Unit = {
def testF2iF32() : Unit = {
val rounding = FpuRoundMode.elements.randomPick()
val (a,b,f) = f32.f2i(rounding).f32_i32
testF2iExact(a,b, f, true, rounding)
@ -823,11 +900,26 @@ class FpuTest extends FunSuite{
testSgnjxRaw(a, b)
}
def testTransfer() : Unit = {
def testTransferF32() : Unit = {
val (a,b,r,f) = f32.transfer.RAW.f32_f32_i32
testTransferRaw(a, Random.nextBoolean(), Random.nextBoolean())
testTransferF32Raw(a, Random.nextBoolean(), Random.nextBoolean())
}
def testTransferF64() : Unit = {
val (a,b,r,f) = f64.transfer.RAW.f64_f64_i32
testTransferF64Raw(a)
}
def testTransferF64F32() : Unit = {
val (a,b,r,f) = f64.f32.RAW.f64_f64_i32
testTransferF64F32Raw(a, Random.nextBoolean())
}
def testTransferF32F64() : Unit = {
val (a,b,r,f) = f32.f64.RAW.f32_f32_i32
testTransferF32F64Raw(a, Random.nextBoolean())
}
def testClass() : Unit = {
val (a,b,r,f) = f32.fclass.RAW.f32_f32_i32
testClassRaw(a)
@ -854,59 +946,112 @@ class FpuTest extends FunSuite{
testI2fExact(a,b,f, false, rounding)
}
def testMul() : Unit = {
def testMulF32() : Unit = {
val rounding = FpuRoundMode.elements.randomPick()
val (a,b,c,f) = f32.mul(rounding).f32_f32_f32
testBinaryOp(mul,a,b,c,f, rounding,"mul")
}
def testAdd() : Unit = {
def testAddF32() : Unit = {
val rounding = FpuRoundMode.elements.randomPick()
val (a,b,c,f) = f32.add(rounding).f32_f32_f32
testBinaryOp(add,a,b,c,f, rounding,"add")
}
def testSub() : Unit = {
def testSubF32() : Unit = {
val rounding = FpuRoundMode.elements.randomPick()
val (a,b,c,f) = f32.sub(rounding).f32_f32_f32
testBinaryOp(sub,a,b,c,f, rounding,"sub")
}
def testMulF64() : Unit = {
val rounding = FpuRoundMode.elements.randomPick()
val (a,b,c,f) = f64.mul(rounding).f64_f64_f64
testBinaryOpF64(mul,a,b,c,f, rounding,"mul")
}
val f32Tests = List[() => Unit](testSub, testAdd, testMul, testI2f, testUI2f, testMin, testMax, testSgnj, testTransfer, testDiv, testSqrt, testF2i, testF2ui, testLe, testEq, testLt, testClass, testFma)
val f32Tests = List[() => Unit](testSubF32, testAddF32, testMulF32, testI2f, testUI2f, testMin, testMax, testSgnj, testTransferF32, testDiv, testSqrt, testF2iF32, testF2uiF32, testLe, testEq, testLt, testClass, testFma)
//TODO test boxing
if(p.withDouble) {
// for(_ <- 0 until 10000) testUI2f64()
// for(_ <- 0 until 10000) testI2f64()
// println("f64 i2f done")
//
// for(_ <- 0 until 10000) testF2uiF64()
// for(_ <- 0 until 10000) testF2iF64()
// println("f64 f2i done")
// testF2iExact(1.0f,1, 0, false, FpuRoundMode.RTZ)
// testF2iExact(2.0f,2, 0, false, FpuRoundMode.RTZ)
// testF2iExact(2.5f,2, 1, false, FpuRoundMode.RTZ)
testBinaryOpF64(mul,1.0, 1.0, 1.0,0 , FpuRoundMode.RNE,"mul")
testBinaryOpF64(mul,1.0, 2.0, 2.0,0 , FpuRoundMode.RNE,"mul")
testBinaryOpF64(mul,2.5, 2.0, 5.0,0 , FpuRoundMode.RNE,"mul")
testTransferRaw(1.0f, false, false)
testTransferRaw(2.0f, false, false)
testTransferRaw(2.5f, false, false)
testTransferRaw(6.97949770801e-39f, false, false)
testTransferRaw(8.72437213501e-40f, false, false)
testTransferRaw(5.6E-45f, false, false)
for(_ <- 0 until 10000) testMulF64()
println("f64 Mul done")
testTransferF64Raw(1.0)
testTransferF64Raw(2.0)
testTransferF64Raw(2.5)
testTransferF64Raw(6.97949770801e-39)
testTransferF64Raw(8.72437213501e-40)
testTransferF64Raw(5.6E-45)
testTransferF32F64Raw(b2f(0xFFFF1234), false)
testTransferF64F32Raw(b2d(0xFFF123498765463l << 4), false)
testTransferF32F64Raw(b2f(0xFFFF1234), true)
testTransferF64F32Raw(b2d(0xFFF123498765463l << 4), true)
for (_ <- 0 until 10000) testTransferF64()
println("f64 load/store/rf transfer done")
for (_ <- 0 until 10000) testTransferF64F32()
println("f64 -> f32 load/store/rf transfer done")
for(_ <- 0 until 10000) testTransfer()
for (_ <- 0 until 10000) testTransferF32F64()
println("f32 -> f64 load/store/rf transfer done")
}
for(_ <- 0 until 10000) testTransferF32()
println("f32 load/store/rf transfer done")
for(_ <- 0 until 10000) testF2ui()
for(_ <- 0 until 10000) testF2i()
println("f2i done")
for(_ <- 0 until 10000) testMulF32()
println("Mul done")
for(_ <- 0 until 10000) testUI2f()
for(_ <- 0 until 10000) testI2f()
println("i2f done")
testF2iExact(1.0f,1, 0, false, FpuRoundMode.RTZ)
testF2iExact(2.0f,2, 0, false, FpuRoundMode.RTZ)
testF2iExact(2.5f,2, 1, false, FpuRoundMode.RTZ)
for(_ <- 0 until 10000) testF2uiF32()
for(_ <- 0 until 10000) testF2iF32()
println("f2i done")
// waitUntil(cmdQueue.isEmpty)
// dut.clockDomain.waitSampling(1000)
// simSuccess()
for(i <- 0 until 1000) testFma()
flagClear()
println("fma done") //TODO
@ -959,14 +1104,11 @@ class FpuTest extends FunSuite{
for(_ <- 0 until 10000) testMul()
println("Mul done")
for(_ <- 0 until 10000) testAdd()
for(_ <- 0 until 10000) testSub()
for(_ <- 0 until 10000) testAddF32()
for(_ <- 0 until 10000) testSubF32()
println("Add done")