diff --git a/src/main/scala/vexriscv/ip/fpu/FpuCore.scala b/src/main/scala/vexriscv/ip/fpu/FpuCore.scala index 8209c11..c64ddb3 100644 --- a/src/main/scala/vexriscv/ip/fpu/FpuCore.scala +++ b/src/main/scala/vexriscv/ip/fpu/FpuCore.scala @@ -15,6 +15,8 @@ case class FpuCore(p : FpuParameter) extends Component{ val port = slave(FpuPort(p)) } + +// val commitPerSourceCount = 8 val rfLockCount = 5 val lockIdType = HardType(UInt(log2Up(rfLockCount) bits)) @@ -89,11 +91,44 @@ case class FpuCore(p : FpuParameter) extends Component{ val valid = RegInit(False) val source = Reg(p.source) val address = Reg(p.rfAddress) + val id = Reg(UInt(log2Up(rfLockCount) bits)) + val commited = Reg(Bool) + val write = Reg(Bool) } val lockFree = !lock.map(_.valid).andR val lockFreeId = OHMasking.first(lock.map(!_.valid)) } + val commitLogic = for(source <- 0 until p.sourceCount) yield new Area{ + val fire = False + val target, hit = Reg(UInt(log2Up(rfLockCount) bits)) init(0) + when(fire){ + hit := hit + 1 + } + + io.port.commit(source).ready := False + when(io.port.commit(source).valid) { + for (lock <- rf.lock) { + when(lock.valid && lock.source === source && lock.id === hit) { + fire := True + lock.commited := True + lock.write := io.port.commit(source).write + io.port.commit(source).ready := True + } + } + } + } + +// case class CommitLine() extends Bundle{ +// val valid = Bool() +// val write = Bool() +// } +// val commits = for(i <- 0 until p.sourceCount) yield new Area{ +// val lines = Vec(CommitLine(), commitPerSourceCount) +// lines.foreach(_.valid init(False)) +// +// } + val read = new Area{ val s0 = Stream(RfReadInput()) s0.arbitrationFrom(io.port.cmd) @@ -137,11 +172,17 @@ case class FpuCore(p : FpuParameter) extends Component{ val hits = List((useRs1, s0.rs1), (useRs2, s0.rs2), (useRs3, s0.rs3), (useRd, s0.rd)).map{case (use, reg) => use && rf.lock.map(l => l.valid && l.source === s0.source && l.address === reg).orR} val hazard = hits.orR when(s0.fire && useRd){ + for(i <- 0 until p.sourceCount){ + when(s0.source === i){ + commitLogic(i).target := commitLogic(i).target + 1 + } + } for(i <- 0 until rfLockCount){ when(rf.lockFreeId(i)){ rf.lock(i).valid := True rf.lock(i).source := s0.source rf.lock(i).address := s0.rd + rf.lock(i).id := commitLogic.map(_.target).read(s0.source) } } } @@ -224,9 +265,16 @@ case class FpuCore(p : FpuParameter) extends Component{ } val load = new Area{ - def input = decode.load - - val output = input.stage() + val input = decode.load.stage() + def feed = io.port.load(input.source) + val hazard = !feed.valid + val output = input.haltWhen(hazard).swapPayload(WriteInput()) + io.port.load.foreach(_.ready := False) + feed.ready := input.valid && output.ready + output.source := input.source + output.lockId := input.lockId + output.rd := input.rd + output.value.assignFromBits(feed.value) } @@ -506,47 +554,20 @@ case class FpuCore(p : FpuParameter) extends Component{ val write = new Area{ - val port = rf.ram.writePort - port.valid := False - port.payload.assignDontCare() + val arbitrated = StreamArbiterFactory.lowerFirst.noLock.on(List(load.output, add.output, mul.output)) + val isCommited = rf.lock.map(_.commited).read(arbitrated.lockId) + val commited = arbitrated.haltWhen(!isCommited).toFlow - - val lockFree = Flow(lockIdType) - lockFree.valid := port.fire - lockFree.payload.assignDontCare() - - load.output.ready := False - mul.output.ready := False - add.output.ready := True - io.port.commit.ready := False - when(add.output.valid) { - port.valid := True - port.address := add.output.source @@ add.output.rd - port.data := add.output.value - - lockFree.payload := add.output.lockId - } elsewhen(mul.output.valid) { - port.valid := True - port.address := mul.output.source @@ mul.output.rd - port.data := mul.output.value - - mul.output.ready := True - lockFree.payload := mul.output.lockId - } elsewhen(load.output.valid && io.port.commit.valid) { - port.valid := io.port.commit.write - port.address := load.output.source @@ load.output.rd - port.data.assignFromBits(io.port.commit.value) - - load.output.ready := True - io.port.commit.ready := True - lockFree.payload := load.output.lockId - } - - when(lockFree.fire){ - for(i <- 0 until rfLockCount) when(lockFree.payload === i){ + when(commited.valid){ + for(i <- 0 until rfLockCount) when(commited.lockId === i){ rf.lock(i).valid := False } } + + val port = rf.ram.writePort + port.valid := commited.valid && rf.lock.map(_.write).read(commited.lockId) + port.address := commited.source @@ commited.rd + port.data := commited.value } } @@ -572,7 +593,7 @@ object FpuSynthesisBench extends App{ FpuParameter( internalMantissaSize = 23, withDouble = false, - sourceWidth = 0 + sourceCount = 1 ) ) rtls += new Fpu( @@ -580,7 +601,7 @@ object FpuSynthesisBench extends App{ FpuParameter( internalMantissaSize = 52, withDouble = true, - sourceWidth = 0 + sourceCount = 1 ) ) diff --git a/src/main/scala/vexriscv/ip/fpu/Interface.scala b/src/main/scala/vexriscv/ip/fpu/Interface.scala index 2c5b3f1..00e526a 100644 --- a/src/main/scala/vexriscv/ip/fpu/Interface.scala +++ b/src/main/scala/vexriscv/ip/fpu/Interface.scala @@ -22,13 +22,18 @@ case class FpuFloat(exponentSize: Int, val sign = Bool } -case class FpuOpcode(p : FpuParameter) extends SpinalEnum{ +object FpuOpcode extends SpinalEnum{ val LOAD, STORE, MUL, ADD, FMA, I2F, F2I, CMP, DIV, SQRT = newElement() } +object FpuFormat extends SpinalEnum{ + val FLOAT, DOUBLE = newElement() +} + + case class FpuParameter( internalMantissaSize : Int, withDouble : Boolean, - sourceWidth : Int){ + sourceCount : Int){ val storeLoadType = HardType(Bits(if(withDouble) 64 bits else 32 bits)) val internalExponentSize = if(withDouble) 11 else 8 @@ -37,11 +42,9 @@ case class FpuParameter( internalMantissaSize : Int, val source = HardType(UInt(sourceWidth bits)) val rfAddress = HardType(UInt(5 bits)) - val Opcode = new FpuOpcode(this) - val Format = new SpinalEnum{ - val FLOAT = newElement() - val DOUBLE = withDouble generate newElement() - } + val Opcode = FpuOpcode + val Format = FpuFormat + val sourceWidth = log2Up(sourceCount) } case class FpuCmd(p : FpuParameter) extends Bundle{ @@ -55,9 +58,11 @@ case class FpuCmd(p : FpuParameter) extends Bundle{ } case class FpuCommit(p : FpuParameter) extends Bundle{ - val source = UInt(p.sourceWidth bits) val write = Bool() - val value = p.storeLoadType() // IEEE 754 load +} + +case class FpuLoad(p : FpuParameter) extends Bundle{ + val value = p.storeLoadType() // IEEE 754 } case class FpuRsp(p : FpuParameter) extends Bundle{ @@ -67,11 +72,13 @@ case class FpuRsp(p : FpuParameter) extends Bundle{ case class FpuPort(p : FpuParameter) extends Bundle with IMasterSlave { val cmd = Stream(FpuCmd(p)) - val commit = Stream(FpuCommit(p)) + val commit = Vec(Stream(FpuCommit(p)), p.sourceCount) + val load = Vec(Stream(FpuLoad(p)), p.sourceCount) val rsp = Stream(FpuRsp(p)) override def asMaster(): Unit = { - master(cmd, commit) + master(cmd) + (commit ++ load).foreach(master(_)) slave(rsp) } } diff --git a/src/test/scala/vexriscv/ip/fpu/FpuTest.scala b/src/test/scala/vexriscv/ip/fpu/FpuTest.scala index 0ca5ca7..3190a0c 100644 --- a/src/test/scala/vexriscv/ip/fpu/FpuTest.scala +++ b/src/test/scala/vexriscv/ip/fpu/FpuTest.scala @@ -19,7 +19,7 @@ class FpuTest extends FunSuite{ val p = FpuParameter( internalMantissaSize = 23, withDouble = false, - sourceWidth = 0 + sourceCount = 1 ) SimConfig.withFstWave.compile(new FpuCore(p)).doSim(seed = 42){ dut => @@ -31,8 +31,23 @@ class FpuTest extends FunSuite{ val cpus = for(id <- 0 until 1 << p.sourceWidth) yield new { val cmdQueue = mutable.Queue[FpuCmd => Unit]() val commitQueue = mutable.Queue[FpuCommit => Unit]() + val loadQueue = mutable.Queue[FpuLoad => Unit]() val rspQueue = mutable.Queue[FpuRsp => Unit]() + StreamDriver(dut.io.port.commit(id) ,dut.clockDomain){payload => + if(commitQueue.isEmpty) false else { + commitQueue.dequeue().apply(payload) + true + } + } + + StreamDriver(dut.io.port.load(id) ,dut.clockDomain){payload => + if(loadQueue.isEmpty) false else { + loadQueue.dequeue().apply(payload) + true + } + } + def loadRaw(rd : Int, value : BigInt): Unit ={ cmdQueue += {cmd => cmd.source #= id @@ -44,8 +59,9 @@ class FpuTest extends FunSuite{ cmd.rd #= rd } commitQueue += {cmd => - cmd.source #= id cmd.write #= true + } + loadQueue += {cmd => cmd.value #= value } } @@ -82,6 +98,9 @@ class FpuTest extends FunSuite{ cmd.rs3.randomize() cmd.rd #= rd } + commitQueue += {cmd => + cmd.write #= true + } } def add(rd : Int, rs1 : Int, rs2 : Int): Unit ={ @@ -94,6 +113,9 @@ class FpuTest extends FunSuite{ cmd.rs3.randomize() cmd.rd #= rd } + commitQueue += {cmd => + cmd.write #= true + } } def div(rd : Int, rs1 : Int, rs2 : Int): Unit ={ @@ -106,6 +128,9 @@ class FpuTest extends FunSuite{ cmd.rs3.randomize() cmd.rd #= rd } + commitQueue += {cmd => + cmd.write #= true + } } def sqrt(rd : Int, rs1 : Int): Unit ={ @@ -118,6 +143,9 @@ class FpuTest extends FunSuite{ cmd.rs3.randomize() cmd.rd #= rd } + commitQueue += {cmd => + cmd.write #= true + } } def fma(rd : Int, rs1 : Int, rs2 : Int, rs3 : Int): Unit ={ @@ -130,6 +158,9 @@ class FpuTest extends FunSuite{ cmd.rs3 #= rs3 cmd.rd #= rd } + commitQueue += {cmd => + cmd.write #= true + } } } @@ -143,15 +174,7 @@ class FpuTest extends FunSuite{ } } - StreamDriver(dut.io.port.commit ,dut.clockDomain){payload => - cpus.map(_.commitQueue).filter(_.nonEmpty).toSeq match { - case Nil => false - case l => { - l.randomPick().dequeue().apply(payload) - true - } - } - } + StreamMonitor(dut.io.port.rsp, dut.clockDomain){payload => @@ -272,6 +295,7 @@ class FpuTest extends FunSuite{ val b2f = lang.Float.intBitsToFloat(_) + testAdd(0.1f, 1.6f) testSqrt(1.5625f) testSqrt(1.5625f*2) @@ -289,7 +313,6 @@ class FpuTest extends FunSuite{ // dut.clockDomain.waitSampling(1000) // simFailure() - testAdd(0.1f, 1.6f) testMul(0.1f, 1.6f) testFma(1.1f, 2.2f, 3.0f) testDiv(1.0f, 1.1f)