fpu f64 wip
This commit is contained in:
parent
889cc5fde2
commit
88dffc21f7
|
@ -218,7 +218,6 @@ object TestsWorkspace {
|
|||
config.plugins += new FpuPlugin(
|
||||
externalFpu = false,
|
||||
p = FpuParameter(
|
||||
internalMantissaSize = 23,
|
||||
withDouble = false
|
||||
)
|
||||
)
|
||||
|
|
|
@ -30,6 +30,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
|||
val rd = p.rfAddress()
|
||||
val arg = p.Arg()
|
||||
val roundMode = FpuRoundMode()
|
||||
val format = p.withDouble generate FpuFormat()
|
||||
}
|
||||
|
||||
case class RfReadOutput() extends Bundle{
|
||||
|
@ -40,6 +41,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
|||
val rd = p.rfAddress()
|
||||
val arg = p.Arg()
|
||||
val roundMode = FpuRoundMode()
|
||||
val format = p.withDouble generate FpuFormat()
|
||||
}
|
||||
|
||||
|
||||
|
@ -50,6 +52,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
|||
val i2f = Bool()
|
||||
val arg = Bits(2 bits)
|
||||
val roundMode = FpuRoundMode()
|
||||
val format = p.withDouble generate FpuFormat()
|
||||
}
|
||||
|
||||
case class ShortPipInput() extends Bundle{
|
||||
|
@ -61,6 +64,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
|||
val value = Bits(32 bits)
|
||||
val arg = Bits(2 bits)
|
||||
val roundMode = FpuRoundMode()
|
||||
val format = p.withDouble generate FpuFormat()
|
||||
}
|
||||
|
||||
case class MulInput() extends Bundle{
|
||||
|
@ -72,6 +76,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
|||
val divSqrt = Bool()
|
||||
val msb1, msb2 = Bool() //allow usage of msb bits of mul
|
||||
val roundMode = FpuRoundMode()
|
||||
val format = p.withDouble generate FpuFormat()
|
||||
}
|
||||
|
||||
|
||||
|
@ -82,6 +87,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
|||
val lockId = lockIdType()
|
||||
val div = Bool()
|
||||
val roundMode = FpuRoundMode()
|
||||
val format = p.withDouble generate FpuFormat()
|
||||
}
|
||||
|
||||
|
||||
|
@ -91,6 +97,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
|||
val rd = p.rfAddress()
|
||||
val lockId = lockIdType()
|
||||
val roundMode = FpuRoundMode()
|
||||
val format = p.withDouble generate FpuFormat()
|
||||
}
|
||||
|
||||
|
||||
|
@ -101,6 +108,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
|||
val value = p.writeFloating()
|
||||
val scrap = Bool()
|
||||
val roundMode = FpuRoundMode()
|
||||
val format = p.withDouble generate FpuFormat()
|
||||
}
|
||||
|
||||
case class RoundOutput() extends Bundle{
|
||||
|
@ -111,7 +119,11 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
|||
}
|
||||
|
||||
val rf = new Area{
|
||||
val ram = Mem(p.internalFloating, 32*portCount)
|
||||
case class Entry() extends Bundle{
|
||||
val value = p.internalFloating()
|
||||
val f32 = p.withDouble generate Bool()
|
||||
}
|
||||
val ram = Mem(Entry(), 32*portCount)
|
||||
val lock = for(i <- 0 until rfLockCount) yield new Area{
|
||||
val valid = RegInit(False)
|
||||
val source = Reg(Source())
|
||||
|
@ -219,15 +231,19 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
|||
val s1 = s0.haltWhen(hazard || !rf.lockFree).m2sPipe()
|
||||
val output = s1.swapPayload(RfReadOutput())
|
||||
val s1LockId = RegNextWhen(OHToUInt(rf.lockFreeId), !output.isStall)
|
||||
val rs1Entry = rf.ram.readSync(s0.source @@ s0.rs1,enable = !output.isStall)
|
||||
val rs2Entry = rf.ram.readSync(s0.source @@ s0.rs2,enable = !output.isStall)
|
||||
val rs3Entry = rf.ram.readSync(s0.source @@ s0.rs3,enable = !output.isStall)
|
||||
output.source := s1.source
|
||||
output.opcode := s1.opcode
|
||||
output.lockId := s1LockId
|
||||
output.arg := s1.arg
|
||||
output.roundMode := s1.roundMode
|
||||
output.rd := s1.rd
|
||||
output.rs1 := rf.ram.readSync(s0.source @@ s0.rs1,enable = !output.isStall)
|
||||
output.rs2 := rf.ram.readSync(s0.source @@ s0.rs2,enable = !output.isStall)
|
||||
output.rs3 := rf.ram.readSync(s0.source @@ s0.rs3,enable = !output.isStall)
|
||||
if(p.withDouble) output.format := s1.format
|
||||
output.rs1 := rs1Entry.value
|
||||
output.rs2 := rs2Entry.value
|
||||
output.rs3 := rs3Entry.value
|
||||
}
|
||||
|
||||
val decode = new Area{
|
||||
|
@ -249,44 +265,53 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
|||
|
||||
val divSqrtHit = input.opcode === p.Opcode.DIV || input.opcode === p.Opcode.SQRT
|
||||
val divSqrt = Stream(DivSqrtInput())
|
||||
input.ready setWhen(divSqrtHit && divSqrt.ready)
|
||||
divSqrt.valid := input.valid && divSqrtHit
|
||||
divSqrt.payload.assignSomeByName(read.output.payload)
|
||||
divSqrt.div := input.opcode === p.Opcode.DIV
|
||||
if(p.withDivSqrt) {
|
||||
input.ready setWhen (divSqrtHit && divSqrt.ready)
|
||||
divSqrt.valid := input.valid && divSqrtHit
|
||||
divSqrt.payload.assignSomeByName(read.output.payload)
|
||||
divSqrt.div := input.opcode === p.Opcode.DIV
|
||||
}
|
||||
|
||||
val fmaHit = input.opcode === p.Opcode.FMA
|
||||
val mulHit = input.opcode === p.Opcode.MUL || fmaHit
|
||||
val mul = Stream(MulInput())
|
||||
val divSqrtToMul = Stream(MulInput())
|
||||
|
||||
input.ready setWhen(mulHit && mul.ready && !divSqrtToMul.valid)
|
||||
mul.valid := input.valid && mulHit || divSqrtToMul.valid
|
||||
if(p.withMul) {
|
||||
input.ready setWhen (mulHit && mul.ready && !divSqrtToMul.valid)
|
||||
mul.valid := input.valid && mulHit || divSqrtToMul.valid
|
||||
|
||||
divSqrtToMul.ready := mul.ready
|
||||
mul.payload := divSqrtToMul.payload
|
||||
when(!divSqrtToMul.valid) {
|
||||
mul.payload.assignSomeByName(read.output.payload)
|
||||
mul.add := fmaHit
|
||||
mul.divSqrt := False
|
||||
mul.msb1 := True
|
||||
mul.msb2 := True
|
||||
mul.rs2.sign.allowOverride(); mul.rs2.sign := read.output.rs2.sign ^ input.arg(0)
|
||||
mul.rs3.sign.allowOverride(); mul.rs3.sign := read.output.rs3.sign ^ input.arg(1)
|
||||
divSqrtToMul.ready := mul.ready
|
||||
mul.payload := divSqrtToMul.payload
|
||||
when(!divSqrtToMul.valid) {
|
||||
mul.payload.assignSomeByName(read.output.payload)
|
||||
mul.add := fmaHit
|
||||
mul.divSqrt := False
|
||||
mul.msb1 := True
|
||||
mul.msb2 := True
|
||||
mul.rs2.sign.allowOverride();
|
||||
mul.rs2.sign := read.output.rs2.sign ^ input.arg(0)
|
||||
mul.rs3.sign.allowOverride();
|
||||
mul.rs3.sign := read.output.rs3.sign ^ input.arg(1)
|
||||
}
|
||||
}
|
||||
|
||||
val addHit = input.opcode === p.Opcode.ADD
|
||||
val add = Stream(AddInput())
|
||||
val mulToAdd = Stream(AddInput())
|
||||
|
||||
input.ready setWhen(addHit && add.ready && !mulToAdd.valid)
|
||||
add.valid := input.valid && addHit || mulToAdd.valid
|
||||
|
||||
if(p.withAdd) {
|
||||
input.ready setWhen (addHit && add.ready && !mulToAdd.valid)
|
||||
add.valid := input.valid && addHit || mulToAdd.valid
|
||||
|
||||
mulToAdd.ready := add.ready
|
||||
add.payload := mulToAdd.payload
|
||||
when(!mulToAdd.valid) {
|
||||
add.payload.assignSomeByName(read.output.payload)
|
||||
add.rs2.sign.allowOverride; add.rs2.sign := read.output.rs2.sign ^ input.arg(0)
|
||||
mulToAdd.ready := add.ready
|
||||
add.payload := mulToAdd.payload
|
||||
when(!mulToAdd.valid) {
|
||||
add.payload.assignSomeByName(read.output.payload)
|
||||
add.rs2.sign.allowOverride;
|
||||
add.rs2.sign := read.output.rs2.sign ^ input.arg(0)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -629,7 +654,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
|||
}
|
||||
}
|
||||
|
||||
val mul = new Area{
|
||||
val mul = p.withMul generate new Area{
|
||||
val input = decode.mul.stage()
|
||||
|
||||
val math = new Area {
|
||||
|
@ -707,7 +732,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
|||
input.ready := (input.add ? decode.mulToAdd.ready | output.ready) || input.divSqrt
|
||||
}
|
||||
|
||||
val divSqrt = new Area {
|
||||
val divSqrt = p.withDivSqrt generate new Area {
|
||||
val input = decode.divSqrt.stage()
|
||||
|
||||
val aproxWidth = 8
|
||||
|
@ -889,7 +914,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
|||
}
|
||||
}
|
||||
|
||||
val add = new Area{
|
||||
val add = p.withAdd generate new Area{
|
||||
val input = decode.add.stage()
|
||||
|
||||
val shifter = new Area {
|
||||
|
@ -982,7 +1007,12 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
|||
|
||||
val merge = new Area {
|
||||
//TODO maybe load can bypass merge and round.
|
||||
val arbitrated = StreamArbiterFactory.lowerFirst.noLock.on(List(load.s1.output, add.output, mul.output, shortPip.rfOutput))
|
||||
val inputs = ArrayBuffer[Stream[MergeInput]]()
|
||||
inputs += load.s1.output
|
||||
if(p.withAdd) (inputs += add.output)
|
||||
if(p.withMul) (inputs += mul.output)
|
||||
if(p.withShortPipMisc) (inputs += shortPip.rfOutput)
|
||||
val arbitrated = StreamArbiterFactory.lowerFirst.noLock.on(inputs)
|
||||
val isCommited = rf.lock.map(_.commited).read(arbitrated.lockId)
|
||||
val commited = arbitrated.haltWhen(!isCommited).toFlow
|
||||
}
|
||||
|
@ -1094,22 +1124,23 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
|||
val port = rf.ram.writePort
|
||||
port.valid := input.valid && rf.lock.map(_.write).read(input.lockId)
|
||||
port.address := input.source @@ input.rd
|
||||
port.data := input.value
|
||||
port.data.value := input.value
|
||||
if(p.withDouble) port.data.f32 := ???
|
||||
|
||||
val randomSim = p.sim generate (in UInt(p.internalMantissaSize bits))
|
||||
if(p.sim) when(port.data.isZero || port.data.isInfinity){
|
||||
port.data.mantissa := randomSim
|
||||
if(p.sim) when(port.data.value.isZero || port.data.value.isInfinity){
|
||||
port.data.value.mantissa := randomSim
|
||||
}
|
||||
if(p.sim) when(input.value.special){
|
||||
port.data.exponent(p.internalExponentSize-1 downto 3) := randomSim.resized
|
||||
port.data.value.exponent(p.internalExponentSize-1 downto 3) := randomSim.resized
|
||||
when(!input.value.isNan){
|
||||
port.data.exponent(2 downto 2) := randomSim.resized
|
||||
port.data.value.exponent(2 downto 2) := randomSim.resized
|
||||
}
|
||||
}
|
||||
|
||||
when(port.valid){
|
||||
assert(!(port.data.exponent === 0 && !port.data.special), "Special violation")
|
||||
assert(!(port.data.exponent === port.data.exponent.maxValue && !port.data.special), "Special violation")
|
||||
assert(!(port.data.value.exponent === 0 && !port.data.value.special), "Special violation")
|
||||
assert(!(port.data.value.exponent === port.data.value.exponent.maxValue && !port.data.value.special), "Special violation")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1191,7 +1222,6 @@ object FpuSynthesisBench extends App{
|
|||
"32",
|
||||
portCount = 1,
|
||||
FpuParameter(
|
||||
internalMantissaSize = 23,
|
||||
withDouble = false
|
||||
)
|
||||
)
|
||||
|
|
|
@ -108,10 +108,14 @@ object FpuRoundModeInstr extends SpinalEnum(){
|
|||
}
|
||||
|
||||
|
||||
case class FpuParameter( internalMantissaSize : Int,
|
||||
withDouble : Boolean,
|
||||
sim : Boolean = false){
|
||||
case class FpuParameter( withDouble : Boolean,
|
||||
sim : Boolean = false,
|
||||
withAdd : Boolean = true,
|
||||
withMul : Boolean = true,
|
||||
withDivSqrt : Boolean = true,
|
||||
withShortPipMisc : Boolean = true){
|
||||
|
||||
val internalMantissaSize = if(withDouble) 52 else 23
|
||||
val storeLoadType = HardType(Bits(if(withDouble) 64 bits else 32 bits))
|
||||
val internalExponentSize = (if(withDouble) 11 else 8) + 1
|
||||
val internalFloating = HardType(FpuFloat(exponentSize = internalExponentSize, mantissaSize = internalMantissaSize))
|
||||
|
|
|
@ -32,8 +32,11 @@ class FpuTest extends FunSuite{
|
|||
test("directed"){
|
||||
val portCount = 1
|
||||
val p = FpuParameter(
|
||||
internalMantissaSize = 23,
|
||||
withDouble = false,
|
||||
// withAdd = false,
|
||||
// withMul = false,
|
||||
// withDivSqrt = false,
|
||||
// withShortPipMisc = true
|
||||
sim = true
|
||||
)
|
||||
|
||||
|
@ -866,9 +869,21 @@ class FpuTest extends FunSuite{
|
|||
|
||||
|
||||
|
||||
for(_ <- 0 until 10000) testTransfer()
|
||||
println("f32 load/store/rf transfer done")
|
||||
|
||||
for(_ <- 0 until 10000) testF2ui()
|
||||
for(_ <- 0 until 10000) testF2i()
|
||||
println("f2i done")
|
||||
|
||||
for(_ <- 0 until 10000) testUI2f()
|
||||
for(_ <- 0 until 10000) testI2f()
|
||||
println("i2f done")
|
||||
|
||||
|
||||
|
||||
// waitUntil(cmdQueue.isEmpty)
|
||||
// dut.clockDomain.waitSampling(1000)
|
||||
// simSuccess()
|
||||
|
||||
|
||||
for(i <- 0 until 1000) testFma()
|
||||
|
@ -886,10 +901,6 @@ class FpuTest extends FunSuite{
|
|||
for(_ <- 0 until 10000) testEq()
|
||||
println("Cmp done")
|
||||
|
||||
for(_ <- 0 until 10000) testF2ui()
|
||||
for(_ <- 0 until 10000) testF2i()
|
||||
|
||||
println("f2i done")
|
||||
|
||||
for(_ <- 0 until 10000) testDiv()
|
||||
println("f32 div done")
|
||||
|
@ -900,9 +911,6 @@ class FpuTest extends FunSuite{
|
|||
for(_ <- 0 until 10000) testSgnj()
|
||||
println("f32 sgnj done")
|
||||
|
||||
for(_ <- 0 until 10000) testTransfer()
|
||||
println("f32 load/store/rf transfer done")
|
||||
|
||||
|
||||
for(_ <- 0 until 10000) testClass()
|
||||
println("f32 class done")
|
||||
|
@ -913,9 +921,6 @@ class FpuTest extends FunSuite{
|
|||
println("minMax done")
|
||||
|
||||
|
||||
for(_ <- 0 until 10000) testUI2f()
|
||||
for(_ <- 0 until 10000) testI2f()
|
||||
println("i2f done")
|
||||
|
||||
testBinaryOp(mul,1.469368E-39f, 7.9999995f, 1.17549435E-38f,3, FpuRoundMode.RUP,"mul")
|
||||
testBinaryOp(mul,1.1753509E-38f, 1.0001221f, 1.17549435E-38f ,1, FpuRoundMode.RUP,"mul")
|
||||
|
|
Loading…
Reference in New Issue