fpu f64 wip

This commit is contained in:
Dolu1990 2021-02-10 13:20:17 +01:00
parent 889cc5fde2
commit 88dffc21f7
4 changed files with 93 additions and 55 deletions

View File

@ -218,7 +218,6 @@ object TestsWorkspace {
config.plugins += new FpuPlugin(
externalFpu = false,
p = FpuParameter(
internalMantissaSize = 23,
withDouble = false
)
)

View File

@ -30,6 +30,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
val rd = p.rfAddress()
val arg = p.Arg()
val roundMode = FpuRoundMode()
val format = p.withDouble generate FpuFormat()
}
case class RfReadOutput() extends Bundle{
@ -40,6 +41,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
val rd = p.rfAddress()
val arg = p.Arg()
val roundMode = FpuRoundMode()
val format = p.withDouble generate FpuFormat()
}
@ -50,6 +52,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
val i2f = Bool()
val arg = Bits(2 bits)
val roundMode = FpuRoundMode()
val format = p.withDouble generate FpuFormat()
}
case class ShortPipInput() extends Bundle{
@ -61,6 +64,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
val value = Bits(32 bits)
val arg = Bits(2 bits)
val roundMode = FpuRoundMode()
val format = p.withDouble generate FpuFormat()
}
case class MulInput() extends Bundle{
@ -72,6 +76,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
val divSqrt = Bool()
val msb1, msb2 = Bool() //allow usage of msb bits of mul
val roundMode = FpuRoundMode()
val format = p.withDouble generate FpuFormat()
}
@ -82,6 +87,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
val lockId = lockIdType()
val div = Bool()
val roundMode = FpuRoundMode()
val format = p.withDouble generate FpuFormat()
}
@ -91,6 +97,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
val rd = p.rfAddress()
val lockId = lockIdType()
val roundMode = FpuRoundMode()
val format = p.withDouble generate FpuFormat()
}
@ -101,6 +108,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
val value = p.writeFloating()
val scrap = Bool()
val roundMode = FpuRoundMode()
val format = p.withDouble generate FpuFormat()
}
case class RoundOutput() extends Bundle{
@ -111,7 +119,11 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
}
val rf = new Area{
val ram = Mem(p.internalFloating, 32*portCount)
case class Entry() extends Bundle{
val value = p.internalFloating()
val f32 = p.withDouble generate Bool()
}
val ram = Mem(Entry(), 32*portCount)
val lock = for(i <- 0 until rfLockCount) yield new Area{
val valid = RegInit(False)
val source = Reg(Source())
@ -219,15 +231,19 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
val s1 = s0.haltWhen(hazard || !rf.lockFree).m2sPipe()
val output = s1.swapPayload(RfReadOutput())
val s1LockId = RegNextWhen(OHToUInt(rf.lockFreeId), !output.isStall)
val rs1Entry = rf.ram.readSync(s0.source @@ s0.rs1,enable = !output.isStall)
val rs2Entry = rf.ram.readSync(s0.source @@ s0.rs2,enable = !output.isStall)
val rs3Entry = rf.ram.readSync(s0.source @@ s0.rs3,enable = !output.isStall)
output.source := s1.source
output.opcode := s1.opcode
output.lockId := s1LockId
output.arg := s1.arg
output.roundMode := s1.roundMode
output.rd := s1.rd
output.rs1 := rf.ram.readSync(s0.source @@ s0.rs1,enable = !output.isStall)
output.rs2 := rf.ram.readSync(s0.source @@ s0.rs2,enable = !output.isStall)
output.rs3 := rf.ram.readSync(s0.source @@ s0.rs3,enable = !output.isStall)
if(p.withDouble) output.format := s1.format
output.rs1 := rs1Entry.value
output.rs2 := rs2Entry.value
output.rs3 := rs3Entry.value
}
val decode = new Area{
@ -249,17 +265,20 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
val divSqrtHit = input.opcode === p.Opcode.DIV || input.opcode === p.Opcode.SQRT
val divSqrt = Stream(DivSqrtInput())
input.ready setWhen(divSqrtHit && divSqrt.ready)
if(p.withDivSqrt) {
input.ready setWhen (divSqrtHit && divSqrt.ready)
divSqrt.valid := input.valid && divSqrtHit
divSqrt.payload.assignSomeByName(read.output.payload)
divSqrt.div := input.opcode === p.Opcode.DIV
}
val fmaHit = input.opcode === p.Opcode.FMA
val mulHit = input.opcode === p.Opcode.MUL || fmaHit
val mul = Stream(MulInput())
val divSqrtToMul = Stream(MulInput())
input.ready setWhen(mulHit && mul.ready && !divSqrtToMul.valid)
if(p.withMul) {
input.ready setWhen (mulHit && mul.ready && !divSqrtToMul.valid)
mul.valid := input.valid && mulHit || divSqrtToMul.valid
divSqrtToMul.ready := mul.ready
@ -270,23 +289,29 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
mul.divSqrt := False
mul.msb1 := True
mul.msb2 := True
mul.rs2.sign.allowOverride(); mul.rs2.sign := read.output.rs2.sign ^ input.arg(0)
mul.rs3.sign.allowOverride(); mul.rs3.sign := read.output.rs3.sign ^ input.arg(1)
mul.rs2.sign.allowOverride();
mul.rs2.sign := read.output.rs2.sign ^ input.arg(0)
mul.rs3.sign.allowOverride();
mul.rs3.sign := read.output.rs3.sign ^ input.arg(1)
}
}
val addHit = input.opcode === p.Opcode.ADD
val add = Stream(AddInput())
val mulToAdd = Stream(AddInput())
input.ready setWhen(addHit && add.ready && !mulToAdd.valid)
add.valid := input.valid && addHit || mulToAdd.valid
if(p.withAdd) {
input.ready setWhen (addHit && add.ready && !mulToAdd.valid)
add.valid := input.valid && addHit || mulToAdd.valid
mulToAdd.ready := add.ready
add.payload := mulToAdd.payload
when(!mulToAdd.valid) {
add.payload.assignSomeByName(read.output.payload)
add.rs2.sign.allowOverride; add.rs2.sign := read.output.rs2.sign ^ input.arg(0)
add.rs2.sign.allowOverride;
add.rs2.sign := read.output.rs2.sign ^ input.arg(0)
}
}
}
@ -629,7 +654,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
}
}
val mul = new Area{
val mul = p.withMul generate new Area{
val input = decode.mul.stage()
val math = new Area {
@ -707,7 +732,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
input.ready := (input.add ? decode.mulToAdd.ready | output.ready) || input.divSqrt
}
val divSqrt = new Area {
val divSqrt = p.withDivSqrt generate new Area {
val input = decode.divSqrt.stage()
val aproxWidth = 8
@ -889,7 +914,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
}
}
val add = new Area{
val add = p.withAdd generate new Area{
val input = decode.add.stage()
val shifter = new Area {
@ -982,7 +1007,12 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
val merge = new Area {
//TODO maybe load can bypass merge and round.
val arbitrated = StreamArbiterFactory.lowerFirst.noLock.on(List(load.s1.output, add.output, mul.output, shortPip.rfOutput))
val inputs = ArrayBuffer[Stream[MergeInput]]()
inputs += load.s1.output
if(p.withAdd) (inputs += add.output)
if(p.withMul) (inputs += mul.output)
if(p.withShortPipMisc) (inputs += shortPip.rfOutput)
val arbitrated = StreamArbiterFactory.lowerFirst.noLock.on(inputs)
val isCommited = rf.lock.map(_.commited).read(arbitrated.lockId)
val commited = arbitrated.haltWhen(!isCommited).toFlow
}
@ -1094,22 +1124,23 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
val port = rf.ram.writePort
port.valid := input.valid && rf.lock.map(_.write).read(input.lockId)
port.address := input.source @@ input.rd
port.data := input.value
port.data.value := input.value
if(p.withDouble) port.data.f32 := ???
val randomSim = p.sim generate (in UInt(p.internalMantissaSize bits))
if(p.sim) when(port.data.isZero || port.data.isInfinity){
port.data.mantissa := randomSim
if(p.sim) when(port.data.value.isZero || port.data.value.isInfinity){
port.data.value.mantissa := randomSim
}
if(p.sim) when(input.value.special){
port.data.exponent(p.internalExponentSize-1 downto 3) := randomSim.resized
port.data.value.exponent(p.internalExponentSize-1 downto 3) := randomSim.resized
when(!input.value.isNan){
port.data.exponent(2 downto 2) := randomSim.resized
port.data.value.exponent(2 downto 2) := randomSim.resized
}
}
when(port.valid){
assert(!(port.data.exponent === 0 && !port.data.special), "Special violation")
assert(!(port.data.exponent === port.data.exponent.maxValue && !port.data.special), "Special violation")
assert(!(port.data.value.exponent === 0 && !port.data.value.special), "Special violation")
assert(!(port.data.value.exponent === port.data.value.exponent.maxValue && !port.data.value.special), "Special violation")
}
}
}
@ -1191,7 +1222,6 @@ object FpuSynthesisBench extends App{
"32",
portCount = 1,
FpuParameter(
internalMantissaSize = 23,
withDouble = false
)
)

View File

@ -108,10 +108,14 @@ object FpuRoundModeInstr extends SpinalEnum(){
}
case class FpuParameter( internalMantissaSize : Int,
withDouble : Boolean,
sim : Boolean = false){
case class FpuParameter( withDouble : Boolean,
sim : Boolean = false,
withAdd : Boolean = true,
withMul : Boolean = true,
withDivSqrt : Boolean = true,
withShortPipMisc : Boolean = true){
val internalMantissaSize = if(withDouble) 52 else 23
val storeLoadType = HardType(Bits(if(withDouble) 64 bits else 32 bits))
val internalExponentSize = (if(withDouble) 11 else 8) + 1
val internalFloating = HardType(FpuFloat(exponentSize = internalExponentSize, mantissaSize = internalMantissaSize))

View File

@ -32,8 +32,11 @@ class FpuTest extends FunSuite{
test("directed"){
val portCount = 1
val p = FpuParameter(
internalMantissaSize = 23,
withDouble = false,
// withAdd = false,
// withMul = false,
// withDivSqrt = false,
// withShortPipMisc = true
sim = true
)
@ -866,9 +869,21 @@ class FpuTest extends FunSuite{
for(_ <- 0 until 10000) testTransfer()
println("f32 load/store/rf transfer done")
for(_ <- 0 until 10000) testF2ui()
for(_ <- 0 until 10000) testF2i()
println("f2i done")
for(_ <- 0 until 10000) testUI2f()
for(_ <- 0 until 10000) testI2f()
println("i2f done")
// waitUntil(cmdQueue.isEmpty)
// dut.clockDomain.waitSampling(1000)
// simSuccess()
for(i <- 0 until 1000) testFma()
@ -886,10 +901,6 @@ class FpuTest extends FunSuite{
for(_ <- 0 until 10000) testEq()
println("Cmp done")
for(_ <- 0 until 10000) testF2ui()
for(_ <- 0 until 10000) testF2i()
println("f2i done")
for(_ <- 0 until 10000) testDiv()
println("f32 div done")
@ -900,9 +911,6 @@ class FpuTest extends FunSuite{
for(_ <- 0 until 10000) testSgnj()
println("f32 sgnj done")
for(_ <- 0 until 10000) testTransfer()
println("f32 load/store/rf transfer done")
for(_ <- 0 until 10000) testClass()
println("f32 class done")
@ -913,9 +921,6 @@ class FpuTest extends FunSuite{
println("minMax done")
for(_ <- 0 until 10000) testUI2f()
for(_ <- 0 until 10000) testI2f()
println("i2f done")
testBinaryOp(mul,1.469368E-39f, 7.9999995f, 1.17549435E-38f,3, FpuRoundMode.RUP,"mul")
testBinaryOp(mul,1.1753509E-38f, 1.0001221f, 1.17549435E-38f ,1, FpuRoundMode.RUP,"mul")