fpu f64 wip
This commit is contained in:
parent
889cc5fde2
commit
88dffc21f7
|
@ -218,7 +218,6 @@ object TestsWorkspace {
|
||||||
config.plugins += new FpuPlugin(
|
config.plugins += new FpuPlugin(
|
||||||
externalFpu = false,
|
externalFpu = false,
|
||||||
p = FpuParameter(
|
p = FpuParameter(
|
||||||
internalMantissaSize = 23,
|
|
||||||
withDouble = false
|
withDouble = false
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
|
@ -30,6 +30,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
val rd = p.rfAddress()
|
val rd = p.rfAddress()
|
||||||
val arg = p.Arg()
|
val arg = p.Arg()
|
||||||
val roundMode = FpuRoundMode()
|
val roundMode = FpuRoundMode()
|
||||||
|
val format = p.withDouble generate FpuFormat()
|
||||||
}
|
}
|
||||||
|
|
||||||
case class RfReadOutput() extends Bundle{
|
case class RfReadOutput() extends Bundle{
|
||||||
|
@ -40,6 +41,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
val rd = p.rfAddress()
|
val rd = p.rfAddress()
|
||||||
val arg = p.Arg()
|
val arg = p.Arg()
|
||||||
val roundMode = FpuRoundMode()
|
val roundMode = FpuRoundMode()
|
||||||
|
val format = p.withDouble generate FpuFormat()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -50,6 +52,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
val i2f = Bool()
|
val i2f = Bool()
|
||||||
val arg = Bits(2 bits)
|
val arg = Bits(2 bits)
|
||||||
val roundMode = FpuRoundMode()
|
val roundMode = FpuRoundMode()
|
||||||
|
val format = p.withDouble generate FpuFormat()
|
||||||
}
|
}
|
||||||
|
|
||||||
case class ShortPipInput() extends Bundle{
|
case class ShortPipInput() extends Bundle{
|
||||||
|
@ -61,6 +64,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
val value = Bits(32 bits)
|
val value = Bits(32 bits)
|
||||||
val arg = Bits(2 bits)
|
val arg = Bits(2 bits)
|
||||||
val roundMode = FpuRoundMode()
|
val roundMode = FpuRoundMode()
|
||||||
|
val format = p.withDouble generate FpuFormat()
|
||||||
}
|
}
|
||||||
|
|
||||||
case class MulInput() extends Bundle{
|
case class MulInput() extends Bundle{
|
||||||
|
@ -72,6 +76,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
val divSqrt = Bool()
|
val divSqrt = Bool()
|
||||||
val msb1, msb2 = Bool() //allow usage of msb bits of mul
|
val msb1, msb2 = Bool() //allow usage of msb bits of mul
|
||||||
val roundMode = FpuRoundMode()
|
val roundMode = FpuRoundMode()
|
||||||
|
val format = p.withDouble generate FpuFormat()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -82,6 +87,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
val lockId = lockIdType()
|
val lockId = lockIdType()
|
||||||
val div = Bool()
|
val div = Bool()
|
||||||
val roundMode = FpuRoundMode()
|
val roundMode = FpuRoundMode()
|
||||||
|
val format = p.withDouble generate FpuFormat()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -91,6 +97,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
val rd = p.rfAddress()
|
val rd = p.rfAddress()
|
||||||
val lockId = lockIdType()
|
val lockId = lockIdType()
|
||||||
val roundMode = FpuRoundMode()
|
val roundMode = FpuRoundMode()
|
||||||
|
val format = p.withDouble generate FpuFormat()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -101,6 +108,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
val value = p.writeFloating()
|
val value = p.writeFloating()
|
||||||
val scrap = Bool()
|
val scrap = Bool()
|
||||||
val roundMode = FpuRoundMode()
|
val roundMode = FpuRoundMode()
|
||||||
|
val format = p.withDouble generate FpuFormat()
|
||||||
}
|
}
|
||||||
|
|
||||||
case class RoundOutput() extends Bundle{
|
case class RoundOutput() extends Bundle{
|
||||||
|
@ -111,7 +119,11 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
}
|
}
|
||||||
|
|
||||||
val rf = new Area{
|
val rf = new Area{
|
||||||
val ram = Mem(p.internalFloating, 32*portCount)
|
case class Entry() extends Bundle{
|
||||||
|
val value = p.internalFloating()
|
||||||
|
val f32 = p.withDouble generate Bool()
|
||||||
|
}
|
||||||
|
val ram = Mem(Entry(), 32*portCount)
|
||||||
val lock = for(i <- 0 until rfLockCount) yield new Area{
|
val lock = for(i <- 0 until rfLockCount) yield new Area{
|
||||||
val valid = RegInit(False)
|
val valid = RegInit(False)
|
||||||
val source = Reg(Source())
|
val source = Reg(Source())
|
||||||
|
@ -219,15 +231,19 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
val s1 = s0.haltWhen(hazard || !rf.lockFree).m2sPipe()
|
val s1 = s0.haltWhen(hazard || !rf.lockFree).m2sPipe()
|
||||||
val output = s1.swapPayload(RfReadOutput())
|
val output = s1.swapPayload(RfReadOutput())
|
||||||
val s1LockId = RegNextWhen(OHToUInt(rf.lockFreeId), !output.isStall)
|
val s1LockId = RegNextWhen(OHToUInt(rf.lockFreeId), !output.isStall)
|
||||||
|
val rs1Entry = rf.ram.readSync(s0.source @@ s0.rs1,enable = !output.isStall)
|
||||||
|
val rs2Entry = rf.ram.readSync(s0.source @@ s0.rs2,enable = !output.isStall)
|
||||||
|
val rs3Entry = rf.ram.readSync(s0.source @@ s0.rs3,enable = !output.isStall)
|
||||||
output.source := s1.source
|
output.source := s1.source
|
||||||
output.opcode := s1.opcode
|
output.opcode := s1.opcode
|
||||||
output.lockId := s1LockId
|
output.lockId := s1LockId
|
||||||
output.arg := s1.arg
|
output.arg := s1.arg
|
||||||
output.roundMode := s1.roundMode
|
output.roundMode := s1.roundMode
|
||||||
output.rd := s1.rd
|
output.rd := s1.rd
|
||||||
output.rs1 := rf.ram.readSync(s0.source @@ s0.rs1,enable = !output.isStall)
|
if(p.withDouble) output.format := s1.format
|
||||||
output.rs2 := rf.ram.readSync(s0.source @@ s0.rs2,enable = !output.isStall)
|
output.rs1 := rs1Entry.value
|
||||||
output.rs3 := rf.ram.readSync(s0.source @@ s0.rs3,enable = !output.isStall)
|
output.rs2 := rs2Entry.value
|
||||||
|
output.rs3 := rs3Entry.value
|
||||||
}
|
}
|
||||||
|
|
||||||
val decode = new Area{
|
val decode = new Area{
|
||||||
|
@ -249,16 +265,19 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
|
|
||||||
val divSqrtHit = input.opcode === p.Opcode.DIV || input.opcode === p.Opcode.SQRT
|
val divSqrtHit = input.opcode === p.Opcode.DIV || input.opcode === p.Opcode.SQRT
|
||||||
val divSqrt = Stream(DivSqrtInput())
|
val divSqrt = Stream(DivSqrtInput())
|
||||||
|
if(p.withDivSqrt) {
|
||||||
input.ready setWhen (divSqrtHit && divSqrt.ready)
|
input.ready setWhen (divSqrtHit && divSqrt.ready)
|
||||||
divSqrt.valid := input.valid && divSqrtHit
|
divSqrt.valid := input.valid && divSqrtHit
|
||||||
divSqrt.payload.assignSomeByName(read.output.payload)
|
divSqrt.payload.assignSomeByName(read.output.payload)
|
||||||
divSqrt.div := input.opcode === p.Opcode.DIV
|
divSqrt.div := input.opcode === p.Opcode.DIV
|
||||||
|
}
|
||||||
|
|
||||||
val fmaHit = input.opcode === p.Opcode.FMA
|
val fmaHit = input.opcode === p.Opcode.FMA
|
||||||
val mulHit = input.opcode === p.Opcode.MUL || fmaHit
|
val mulHit = input.opcode === p.Opcode.MUL || fmaHit
|
||||||
val mul = Stream(MulInput())
|
val mul = Stream(MulInput())
|
||||||
val divSqrtToMul = Stream(MulInput())
|
val divSqrtToMul = Stream(MulInput())
|
||||||
|
|
||||||
|
if(p.withMul) {
|
||||||
input.ready setWhen (mulHit && mul.ready && !divSqrtToMul.valid)
|
input.ready setWhen (mulHit && mul.ready && !divSqrtToMul.valid)
|
||||||
mul.valid := input.valid && mulHit || divSqrtToMul.valid
|
mul.valid := input.valid && mulHit || divSqrtToMul.valid
|
||||||
|
|
||||||
|
@ -270,23 +289,29 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
mul.divSqrt := False
|
mul.divSqrt := False
|
||||||
mul.msb1 := True
|
mul.msb1 := True
|
||||||
mul.msb2 := True
|
mul.msb2 := True
|
||||||
mul.rs2.sign.allowOverride(); mul.rs2.sign := read.output.rs2.sign ^ input.arg(0)
|
mul.rs2.sign.allowOverride();
|
||||||
mul.rs3.sign.allowOverride(); mul.rs3.sign := read.output.rs3.sign ^ input.arg(1)
|
mul.rs2.sign := read.output.rs2.sign ^ input.arg(0)
|
||||||
|
mul.rs3.sign.allowOverride();
|
||||||
|
mul.rs3.sign := read.output.rs3.sign ^ input.arg(1)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
val addHit = input.opcode === p.Opcode.ADD
|
val addHit = input.opcode === p.Opcode.ADD
|
||||||
val add = Stream(AddInput())
|
val add = Stream(AddInput())
|
||||||
val mulToAdd = Stream(AddInput())
|
val mulToAdd = Stream(AddInput())
|
||||||
|
|
||||||
|
|
||||||
|
if(p.withAdd) {
|
||||||
input.ready setWhen (addHit && add.ready && !mulToAdd.valid)
|
input.ready setWhen (addHit && add.ready && !mulToAdd.valid)
|
||||||
add.valid := input.valid && addHit || mulToAdd.valid
|
add.valid := input.valid && addHit || mulToAdd.valid
|
||||||
|
|
||||||
|
|
||||||
mulToAdd.ready := add.ready
|
mulToAdd.ready := add.ready
|
||||||
add.payload := mulToAdd.payload
|
add.payload := mulToAdd.payload
|
||||||
when(!mulToAdd.valid) {
|
when(!mulToAdd.valid) {
|
||||||
add.payload.assignSomeByName(read.output.payload)
|
add.payload.assignSomeByName(read.output.payload)
|
||||||
add.rs2.sign.allowOverride; add.rs2.sign := read.output.rs2.sign ^ input.arg(0)
|
add.rs2.sign.allowOverride;
|
||||||
|
add.rs2.sign := read.output.rs2.sign ^ input.arg(0)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -629,7 +654,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
val mul = new Area{
|
val mul = p.withMul generate new Area{
|
||||||
val input = decode.mul.stage()
|
val input = decode.mul.stage()
|
||||||
|
|
||||||
val math = new Area {
|
val math = new Area {
|
||||||
|
@ -707,7 +732,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
input.ready := (input.add ? decode.mulToAdd.ready | output.ready) || input.divSqrt
|
input.ready := (input.add ? decode.mulToAdd.ready | output.ready) || input.divSqrt
|
||||||
}
|
}
|
||||||
|
|
||||||
val divSqrt = new Area {
|
val divSqrt = p.withDivSqrt generate new Area {
|
||||||
val input = decode.divSqrt.stage()
|
val input = decode.divSqrt.stage()
|
||||||
|
|
||||||
val aproxWidth = 8
|
val aproxWidth = 8
|
||||||
|
@ -889,7 +914,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
val add = new Area{
|
val add = p.withAdd generate new Area{
|
||||||
val input = decode.add.stage()
|
val input = decode.add.stage()
|
||||||
|
|
||||||
val shifter = new Area {
|
val shifter = new Area {
|
||||||
|
@ -982,7 +1007,12 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
|
|
||||||
val merge = new Area {
|
val merge = new Area {
|
||||||
//TODO maybe load can bypass merge and round.
|
//TODO maybe load can bypass merge and round.
|
||||||
val arbitrated = StreamArbiterFactory.lowerFirst.noLock.on(List(load.s1.output, add.output, mul.output, shortPip.rfOutput))
|
val inputs = ArrayBuffer[Stream[MergeInput]]()
|
||||||
|
inputs += load.s1.output
|
||||||
|
if(p.withAdd) (inputs += add.output)
|
||||||
|
if(p.withMul) (inputs += mul.output)
|
||||||
|
if(p.withShortPipMisc) (inputs += shortPip.rfOutput)
|
||||||
|
val arbitrated = StreamArbiterFactory.lowerFirst.noLock.on(inputs)
|
||||||
val isCommited = rf.lock.map(_.commited).read(arbitrated.lockId)
|
val isCommited = rf.lock.map(_.commited).read(arbitrated.lockId)
|
||||||
val commited = arbitrated.haltWhen(!isCommited).toFlow
|
val commited = arbitrated.haltWhen(!isCommited).toFlow
|
||||||
}
|
}
|
||||||
|
@ -1094,22 +1124,23 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
val port = rf.ram.writePort
|
val port = rf.ram.writePort
|
||||||
port.valid := input.valid && rf.lock.map(_.write).read(input.lockId)
|
port.valid := input.valid && rf.lock.map(_.write).read(input.lockId)
|
||||||
port.address := input.source @@ input.rd
|
port.address := input.source @@ input.rd
|
||||||
port.data := input.value
|
port.data.value := input.value
|
||||||
|
if(p.withDouble) port.data.f32 := ???
|
||||||
|
|
||||||
val randomSim = p.sim generate (in UInt(p.internalMantissaSize bits))
|
val randomSim = p.sim generate (in UInt(p.internalMantissaSize bits))
|
||||||
if(p.sim) when(port.data.isZero || port.data.isInfinity){
|
if(p.sim) when(port.data.value.isZero || port.data.value.isInfinity){
|
||||||
port.data.mantissa := randomSim
|
port.data.value.mantissa := randomSim
|
||||||
}
|
}
|
||||||
if(p.sim) when(input.value.special){
|
if(p.sim) when(input.value.special){
|
||||||
port.data.exponent(p.internalExponentSize-1 downto 3) := randomSim.resized
|
port.data.value.exponent(p.internalExponentSize-1 downto 3) := randomSim.resized
|
||||||
when(!input.value.isNan){
|
when(!input.value.isNan){
|
||||||
port.data.exponent(2 downto 2) := randomSim.resized
|
port.data.value.exponent(2 downto 2) := randomSim.resized
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
when(port.valid){
|
when(port.valid){
|
||||||
assert(!(port.data.exponent === 0 && !port.data.special), "Special violation")
|
assert(!(port.data.value.exponent === 0 && !port.data.value.special), "Special violation")
|
||||||
assert(!(port.data.exponent === port.data.exponent.maxValue && !port.data.special), "Special violation")
|
assert(!(port.data.value.exponent === port.data.value.exponent.maxValue && !port.data.value.special), "Special violation")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1191,7 +1222,6 @@ object FpuSynthesisBench extends App{
|
||||||
"32",
|
"32",
|
||||||
portCount = 1,
|
portCount = 1,
|
||||||
FpuParameter(
|
FpuParameter(
|
||||||
internalMantissaSize = 23,
|
|
||||||
withDouble = false
|
withDouble = false
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
|
@ -108,10 +108,14 @@ object FpuRoundModeInstr extends SpinalEnum(){
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
case class FpuParameter( internalMantissaSize : Int,
|
case class FpuParameter( withDouble : Boolean,
|
||||||
withDouble : Boolean,
|
sim : Boolean = false,
|
||||||
sim : Boolean = false){
|
withAdd : Boolean = true,
|
||||||
|
withMul : Boolean = true,
|
||||||
|
withDivSqrt : Boolean = true,
|
||||||
|
withShortPipMisc : Boolean = true){
|
||||||
|
|
||||||
|
val internalMantissaSize = if(withDouble) 52 else 23
|
||||||
val storeLoadType = HardType(Bits(if(withDouble) 64 bits else 32 bits))
|
val storeLoadType = HardType(Bits(if(withDouble) 64 bits else 32 bits))
|
||||||
val internalExponentSize = (if(withDouble) 11 else 8) + 1
|
val internalExponentSize = (if(withDouble) 11 else 8) + 1
|
||||||
val internalFloating = HardType(FpuFloat(exponentSize = internalExponentSize, mantissaSize = internalMantissaSize))
|
val internalFloating = HardType(FpuFloat(exponentSize = internalExponentSize, mantissaSize = internalMantissaSize))
|
||||||
|
|
|
@ -32,8 +32,11 @@ class FpuTest extends FunSuite{
|
||||||
test("directed"){
|
test("directed"){
|
||||||
val portCount = 1
|
val portCount = 1
|
||||||
val p = FpuParameter(
|
val p = FpuParameter(
|
||||||
internalMantissaSize = 23,
|
|
||||||
withDouble = false,
|
withDouble = false,
|
||||||
|
// withAdd = false,
|
||||||
|
// withMul = false,
|
||||||
|
// withDivSqrt = false,
|
||||||
|
// withShortPipMisc = true
|
||||||
sim = true
|
sim = true
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -866,9 +869,21 @@ class FpuTest extends FunSuite{
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
for(_ <- 0 until 10000) testTransfer()
|
||||||
|
println("f32 load/store/rf transfer done")
|
||||||
|
|
||||||
|
for(_ <- 0 until 10000) testF2ui()
|
||||||
|
for(_ <- 0 until 10000) testF2i()
|
||||||
|
println("f2i done")
|
||||||
|
|
||||||
|
for(_ <- 0 until 10000) testUI2f()
|
||||||
|
for(_ <- 0 until 10000) testI2f()
|
||||||
|
println("i2f done")
|
||||||
|
|
||||||
|
|
||||||
|
// waitUntil(cmdQueue.isEmpty)
|
||||||
|
// dut.clockDomain.waitSampling(1000)
|
||||||
|
// simSuccess()
|
||||||
|
|
||||||
|
|
||||||
for(i <- 0 until 1000) testFma()
|
for(i <- 0 until 1000) testFma()
|
||||||
|
@ -886,10 +901,6 @@ class FpuTest extends FunSuite{
|
||||||
for(_ <- 0 until 10000) testEq()
|
for(_ <- 0 until 10000) testEq()
|
||||||
println("Cmp done")
|
println("Cmp done")
|
||||||
|
|
||||||
for(_ <- 0 until 10000) testF2ui()
|
|
||||||
for(_ <- 0 until 10000) testF2i()
|
|
||||||
|
|
||||||
println("f2i done")
|
|
||||||
|
|
||||||
for(_ <- 0 until 10000) testDiv()
|
for(_ <- 0 until 10000) testDiv()
|
||||||
println("f32 div done")
|
println("f32 div done")
|
||||||
|
@ -900,9 +911,6 @@ class FpuTest extends FunSuite{
|
||||||
for(_ <- 0 until 10000) testSgnj()
|
for(_ <- 0 until 10000) testSgnj()
|
||||||
println("f32 sgnj done")
|
println("f32 sgnj done")
|
||||||
|
|
||||||
for(_ <- 0 until 10000) testTransfer()
|
|
||||||
println("f32 load/store/rf transfer done")
|
|
||||||
|
|
||||||
|
|
||||||
for(_ <- 0 until 10000) testClass()
|
for(_ <- 0 until 10000) testClass()
|
||||||
println("f32 class done")
|
println("f32 class done")
|
||||||
|
@ -913,9 +921,6 @@ class FpuTest extends FunSuite{
|
||||||
println("minMax done")
|
println("minMax done")
|
||||||
|
|
||||||
|
|
||||||
for(_ <- 0 until 10000) testUI2f()
|
|
||||||
for(_ <- 0 until 10000) testI2f()
|
|
||||||
println("i2f done")
|
|
||||||
|
|
||||||
testBinaryOp(mul,1.469368E-39f, 7.9999995f, 1.17549435E-38f,3, FpuRoundMode.RUP,"mul")
|
testBinaryOp(mul,1.469368E-39f, 7.9999995f, 1.17549435E-38f,3, FpuRoundMode.RUP,"mul")
|
||||||
testBinaryOp(mul,1.1753509E-38f, 1.0001221f, 1.17549435E-38f ,1, FpuRoundMode.RUP,"mul")
|
testBinaryOp(mul,1.1753509E-38f, 1.0001221f, 1.17549435E-38f ,1, FpuRoundMode.RUP,"mul")
|
||||||
|
|
Loading…
Reference in New Issue