Got litex SMP cluster to work on FPGA

This commit is contained in:
Dolu1990 2020-05-01 11:14:52 +02:00
parent dc0da9662a
commit f5f30615ba
2 changed files with 292 additions and 78 deletions

View file

@ -249,11 +249,11 @@ object VexRiscvSmpClusterGen {
if(hartId == 0) config.plugins += new DebugPlugin(null)
config
}
def vexRiscvCluster(cpuCount : Int) = VexRiscvSmpCluster(
def vexRiscvCluster(cpuCount : Int, resetVector : Long = 0x80000000l) = VexRiscvSmpCluster(
debugClockDomain = ClockDomain.current.copy(reset = Bool().setName("debugResetIn")),
p = VexRiscvSmpClusterParameter(
cpuConfigs = List.tabulate(cpuCount) {
vexRiscvConfig(_)
vexRiscvConfig(_, resetVector = resetVector)
}
)
)
@ -440,7 +440,10 @@ object VexRiscvSmpClusterTestInfrastructure{
import spinal.core.sim._
dut.clockDomain.forkStimulus(10)
dut.debugClockDomain.forkStimulus(10)
JtagTcp(dut.io.jtag, 100)
// JtagTcp(dut.io.jtag, 100)
dut.io.jtag.tck #= false
dut.io.jtag.tdi #= false
dut.io.jtag.tms #= false
}
}
@ -491,11 +494,17 @@ object VexRiscvSmpClusterOpenSbi extends App{
val cpuCount = 4
val withStall = false
simConfig.workspaceName("rawr_4c").compile(VexRiscvSmpClusterGen.vexRiscvCluster(cpuCount)).doSimUntilVoid(seed = 42){dut =>
simConfig.workspaceName("rawr_4c").compile(VexRiscvSmpClusterGen.vexRiscvCluster(cpuCount, resetVector = 0x80000000l)).doSimUntilVoid(seed = 42){dut =>
// dut.clockDomain.forkSimSpeedPrinter(1.0)
VexRiscvSmpClusterTestInfrastructure.init(dut)
val ram = VexRiscvSmpClusterTestInfrastructure.ram(dut, withStall)
// ram.memory.loadBin(0x80000000l, "../opensbi/build/platform/spinal/vexriscv/sim/smp/firmware/fw_payload.bin")
// ram.memory.loadBin(0x40F00000l, "/media/data/open/litex_smp/litex_vexriscv_smp/images/fw_jump.bin")
// ram.memory.loadBin(0x40000000l, "/media/data/open/litex_smp/litex_vexriscv_smp/images/Image")
// ram.memory.loadBin(0x40EF0000l, "/media/data/open/litex_smp/litex_vexriscv_smp/images/dtb")
// ram.memory.loadBin(0x41000000l, "/media/data/open/litex_smp/litex_vexriscv_smp/images/rootfs.cpio")
ram.memory.loadBin(0x80000000l, "../opensbi/build/platform/spinal/vexriscv/sim/smp/firmware/fw_jump.bin")
ram.memory.loadBin(0xC0000000l, "../buildroot/output/images/Image")
ram.memory.loadBin(0xC1000000l, "../buildroot/output/images/dtb")

View file

@ -5,14 +5,19 @@ import spinal.lib.bus.bmb._
import spinal.lib.bus.wishbone.{Wishbone, WishboneConfig, WishboneSlaveFactory}
import spinal.lib.com.jtag.Jtag
import spinal.lib._
import spinal.lib.bus.bmb.sim.{BmbMemoryMultiPort, BmbMemoryTester}
import spinal.lib.bus.misc.{AddressMapping, DefaultMapping, SizeMapping}
import spinal.lib.eda.bench.Bench
import spinal.lib.misc.Clint
import spinal.lib.sim.{SimData, SparseMemory, StreamDriver, StreamMonitor, StreamReadyRandomizer}
import vexriscv.demo.smp.VexRiscvLitexSmpClusterOpenSbi.{cpuCount, parameter}
import vexriscv.demo.smp.VexRiscvSmpClusterGen.vexRiscvConfig
import vexriscv.{VexRiscv, VexRiscvConfig}
import vexriscv.plugin.{CsrPlugin, DBusCachedPlugin, DebugPlugin, IBusCachedPlugin}
import scala.collection.mutable
import scala.util.Random
case class LiteDramNativeParameter(addressWidth : Int, dataWidth : Int)
case class LiteDramNativeCmd(p : LiteDramNativeParameter) extends Bundle{
@ -39,46 +44,195 @@ case class LiteDramNative(p : LiteDramNativeParameter) extends Bundle with IMast
slave(rdata)
}
def fromBmb(bmb : Bmb): Unit = new Area{
val resized = bmb.resize(p.dataWidth)
val unburstified = resized.unburstify()
case class Context() extends Bundle {
val context = Bits(unburstified.p.contextWidth bits)
val source = UInt(unburstified.p.sourceWidth bits)
val isWrite = Bool()
}
val (queueFork, cmdFork, dataFork) = StreamFork3(unburstified.cmd)
cmd.arbitrationFrom(cmdFork)
cmd.addr := (cmdFork.address >> log2Up(bmb.p.byteCount)).resized
cmd.we := cmdFork.isWrite
def fromBmb(bmb : Bmb, wdataFifoSize : Int, rdataFifoSize : Int) = {
val bridge = BmbToLiteDram(
bmbParameter = bmb.p,
liteDramParameter = this.p,
wdataFifoSize = wdataFifoSize,
rdataFifoSize = rdataFifoSize
)
bridge.io.input << bmb
bridge.io.output <> this
bridge
}
if(bmb.p.canWrite) {
wdata.arbitrationFrom(dataFork.throwWhen(dataFork.isRead))
wdata.data := cmdFork.data
wdata.we := cmdFork.mask
} else {
dataFork.ready := True
wdata.valid := False
wdata.data.assignDontCare()
wdata.we.assignDontCare()
def simSlave(ram : SparseMemory,cd : ClockDomain, bmb : Bmb = null): Unit ={
import spinal.core.sim._
def bus = this
case class Cmd(address : Long, we : Boolean)
case class WData(data : BigInt, we : Long)
val cmdQueue = mutable.Queue[Cmd]()
val wdataQueue = mutable.Queue[WData]()
val rdataQueue = mutable.Queue[BigInt]()
case class Ref(address : Long, data : BigInt, we : Long, time : Long)
val ref = mutable.Queue[Ref]()
if(bmb != null) StreamMonitor(bmb.cmd, cd){p =>
if(bmb.cmd.opcode.toInt == 1) ref.enqueue(Ref(p.fragment.address.toLong, p.fragment.data.toBigInt, p.fragment.mask.toLong, simTime()))
}
val cmdContext = Stream(Context())
cmdContext.arbitrationFrom(queueFork)
cmdContext.context := unburstified.cmd.context
cmdContext.source := unburstified.cmd.source
cmdContext.isWrite := unburstified.cmd.isWrite
var writeCmdCounter, writeDataCounter = 0
StreamReadyRandomizer(bus.cmd, cd)
StreamMonitor(bus.cmd, cd) { t =>
cmdQueue.enqueue(Cmd(t.addr.toLong * (p.dataWidth/8) , t.we.toBoolean))
if(t.we.toBoolean) writeCmdCounter += 1
}
val rspContext = cmdContext.queue(64)
StreamReadyRandomizer(bus.wdata, cd)
StreamMonitor(bus.wdata, cd) { p =>
writeDataCounter += 1
// if(p.data.toBigInt == BigInt("00000002000000020000000200000002",16)){
// println("ASD")
// }
wdataQueue.enqueue(WData(p.data.toBigInt, p.we.toLong))
}
rdata.ready := unburstified.rsp.fire && !rspContext.isWrite
rspContext.ready := unburstified.rsp.fire
unburstified.rsp.valid := rspContext.valid && (rspContext.isWrite || rdata.valid)
unburstified.rsp.setSuccess()
unburstified.rsp.last := True
unburstified.rsp.source := rspContext.source
unburstified.rsp.context := rspContext.context
unburstified.rsp.data := rdata.data
// new SimStreamAssert(cmd,cd)
// new SimStreamAssert(wdata,cd)
// new SimStreamAssert(rdata,cd)
cd.onSamplings{
if(writeDataCounter-writeCmdCounter > 2){
println("miaou")
}
if(cmdQueue.nonEmpty && Random.nextFloat() < 0.5){
val cmd = cmdQueue.head
if(cmd.we){
if(wdataQueue.nonEmpty){
// if(cmd.address == 0xc02ae850l) {
// println(s"! $writeCmdCounter $writeDataCounter")
// }
cmdQueue.dequeue()
val wdata = wdataQueue.dequeue()
val raw = wdata.data.toByteArray
val left = wdata.data.toByteArray.size-1
if(bmb != null){
assert(ref.nonEmpty)
assert((ref.head.address & 0xFFFFFFF0l) == cmd.address)
assert(ref.head.data == wdata.data)
assert(ref.head.we == wdata.we)
ref.dequeue()
}
// if(cmd.address == 0xc02ae850l) {
// println(s"$cmd $wdata ${simTime()}")
// }
for(i <- 0 until p.dataWidth/8){
if(((wdata.we >> i) & 1) != 0) {
// if(cmd.address == 0xc02ae850l) {
// println(s"W $i ${ if (left - i >= 0) raw(left - i) else 0}")
// }
ram.write(cmd.address + i, if (left - i >= 0) raw(left - i) else 0)
}
}
}
} else {
cmdQueue.dequeue()
val value = new Array[Byte](p.dataWidth/8+1)
val left = value.size-1
for(i <- 0 until p.dataWidth/8) {
value(left-i) = ram.read(cmd.address+i)
}
rdataQueue.enqueue(BigInt(value))
}
}
}
StreamDriver(bus.rdata, cd){ p =>
if(rdataQueue.isEmpty){
false
} else {
p.data #= rdataQueue.dequeue()
true
}
}
}
}
case class BmbToLiteDram(bmbParameter : BmbParameter,
liteDramParameter : LiteDramNativeParameter,
wdataFifoSize : Int,
rdataFifoSize : Int) extends Component{
val io = new Bundle {
val input = slave(Bmb(bmbParameter))
val output = master(LiteDramNative(liteDramParameter))
}
val resized = io.input.resize(liteDramParameter.dataWidth)
val unburstified = resized.unburstify()
case class Context() extends Bundle {
val context = Bits(unburstified.p.contextWidth bits)
val source = UInt(unburstified.p.sourceWidth bits)
val isWrite = Bool()
}
assert(isPow2(rdataFifoSize))
val pendingRead = Reg(UInt(log2Up(rdataFifoSize) + 1 bits)) init(0)
val halt = Bool()
val (cmdFork, dataFork) = StreamFork2(unburstified.cmd.haltWhen(halt))
io.output.cmd.arbitrationFrom(cmdFork.haltWhen(pendingRead.msb))
io.output.cmd.addr := (cmdFork.address >> log2Up(liteDramParameter.dataWidth/8)).resized
io.output.cmd.we := cmdFork.isWrite
if(bmbParameter.canWrite) {
val fifo = dataFork.throwWhen(dataFork.isRead).queue(wdataFifoSize)
io.output.wdata.arbitrationFrom(fifo)
io.output.wdata.data := fifo.data
io.output.wdata.we := fifo.mask
} else {
dataFork.ready := True
io.output.wdata.valid := False
io.output.wdata.data.assignDontCare()
io.output.wdata.we.assignDontCare()
}
val cmdContext = Stream(Context())
cmdContext.valid := unburstified.cmd.fire
cmdContext.context := unburstified.cmd.context
cmdContext.source := unburstified.cmd.source
cmdContext.isWrite := unburstified.cmd.isWrite
halt := !cmdContext.ready
val rspContext = cmdContext.queue(rdataFifoSize)
val rdataFifo = io.output.rdata.queueLowLatency(rdataFifoSize, latency = 1)
rdataFifo.ready := unburstified.rsp.fire && !rspContext.isWrite
rspContext.ready := unburstified.rsp.fire
unburstified.rsp.valid := rspContext.valid && (rspContext.isWrite || rdataFifo.valid)
unburstified.rsp.setSuccess()
unburstified.rsp.last := True
unburstified.rsp.source := rspContext.source
unburstified.rsp.context := rspContext.context
unburstified.rsp.data := rdataFifo.data
pendingRead := pendingRead + U(io.output.cmd.fire && !io.output.cmd.we) - U(rdataFifo.fire)
}
object BmbToLiteDramTester extends App{
import spinal.core.sim._
SimConfig.withWave.compile(BmbToLiteDram(
bmbParameter = BmbParameter(
addressWidth = 20,
dataWidth = 32,
lengthWidth = 6,
sourceWidth = 4,
contextWidth = 16
),
liteDramParameter = LiteDramNativeParameter(
addressWidth = 20,
dataWidth = 128
),
wdataFifoSize = 16,
rdataFifoSize = 16
)).doSimUntilVoid(seed = 42){dut =>
val tester = new BmbMemoryTester(dut.io.input, dut.clockDomain, rspCounterTarget = 3000)
dut.io.output.simSlave(tester.memory.memory, dut.clockDomain)
}
}
@ -120,15 +274,21 @@ case class VexRiscvLitexSmpCluster(p : VexRiscvLitexSmpClusterParameter,
cluster.io.timerInterrupts <> B(clint.harts.map(_.timerInterrupt))
cluster.io.softwareInterrupts <> B(clint.harts.map(_.softwareInterrupt))
val dBusDecoder = BmbDecoderOutOfOrder(
//TODO
// val dBusDecoder = BmbDecoderOutOfOrder(
// p = cluster.io.dMem.p,
// mappings = Seq(DefaultMapping, p.liteDramMapping),
// capabilities = Seq(cluster.io.dMem.p, cluster.io.dMem.p),
// pendingRspTransactionMax = 32
// )
val dBusDecoder = BmbDecoder(
p = cluster.io.dMem.p,
mappings = Seq(DefaultMapping, p.liteDramMapping),
capabilities = Seq(cluster.io.dMem.p, cluster.io.dMem.p),
pendingRspTransactionMax = 32
pendingMax = 31
)
dBusDecoder.io.input << cluster.io.dMem.pipelined(cmdValid = true, cmdReady = true, rspValid = true)
io.dMem.fromBmb(dBusDecoder.io.outputs(1))
val dMemBridge = io.dMem.fromBmb(dBusDecoder.io.outputs(1), wdataFifoSize = 32, rdataFifoSize = 32)
val iBusArbiterParameter = cluster.iBusParameter.copy(sourceWidth = log2Up(cpuCount))
val iBusArbiter = BmbArbiter(
@ -146,7 +306,7 @@ case class VexRiscvLitexSmpCluster(p : VexRiscvLitexSmpClusterParameter,
pendingMax = 15
)
iBusDecoder.io.input << iBusArbiter.io.output
io.iMem.fromBmb(iBusDecoder.io.outputs(1))
val iMemBridge = io.iMem.fromBmb(iBusDecoder.io.outputs(1), wdataFifoSize = 0, rdataFifoSize = 32)
val peripheralAccessLength = Math.max(iBusDecoder.io.outputs(0).p.lengthWidth, dBusDecoder.io.outputs(0).p.lengthWidth)
val peripheralArbiter = BmbArbiter(
@ -160,6 +320,7 @@ case class VexRiscvLitexSmpCluster(p : VexRiscvLitexSmpClusterParameter,
val peripheralWishbone = peripheralArbiter.io.output.toWishbone()
io.peripheral << peripheralWishbone
}
object VexRiscvLitexSmpClusterGen extends App {
val cpuCount = 4
val withStall = false
@ -183,7 +344,8 @@ object VexRiscvLitexSmpClusterGen extends App {
debugClockDomain = ClockDomain.current.copy(reset = Bool().setName("debugResetIn"))
)
SpinalVerilog(Bench.compressIo(dutGen))
// SpinalVerilog(Bench.compressIo(dutGen))
SpinalVerilog(dutGen)
}
@ -194,7 +356,6 @@ object VexRiscvLitexSmpClusterOpenSbi extends App{
val simConfig = SimConfig
simConfig.withWave
simConfig.allOptimisation
simConfig.addSimulatorFlag("--threads 1")
val cpuCount = 4
val withStall = false
@ -204,46 +365,90 @@ object VexRiscvLitexSmpClusterOpenSbi extends App{
cpuConfigs = List.tabulate(cpuCount) { hartId =>
vexRiscvConfig(
hartId = hartId,
ioRange = address => address.msb,
resetVector = 0
ioRange = address => address(31 downto 28) === 0xF,
resetVector = 0x80000000l
)
}
),
liteDram = LiteDramNativeParameter(addressWidth = 32, dataWidth = 128),
liteDramMapping = SizeMapping(0x40000000l, 0x40000000l)
liteDramMapping = SizeMapping(0x80000000l, 0x70000000l)
)
def dutGen = VexRiscvLitexSmpCluster(
p = parameter,
debugClockDomain = ClockDomain.current.copy(reset = Bool().setName("debugResetIn"))
)
def dutGen = {
val top = VexRiscvLitexSmpCluster(
p = parameter,
debugClockDomain = ClockDomain.current.copy(reset = Bool().setName("debugResetIn"))
)
top.rework{
top.io.clint.setAsDirectionLess.allowDirectionLessIo
top.io.peripheral.setAsDirectionLess.allowDirectionLessIo.simPublic()
val hit = (top.io.peripheral.ADR <<2 >= 0xF0010000l && top.io.peripheral.ADR<<2 < 0xF0020000l)
top.io.clint.CYC := top.io.peripheral.CYC && hit
top.io.clint.STB := top.io.peripheral.STB
top.io.clint.WE := top.io.peripheral.WE
top.io.clint.ADR := top.io.peripheral.ADR.resized
top.io.clint.DAT_MOSI := top.io.peripheral.DAT_MOSI
top.io.peripheral.DAT_MISO := top.io.clint.DAT_MISO
top.io.peripheral.ACK := top.io.peripheral.CYC && (!hit || top.io.clint.ACK)
top.io.peripheral.ERR := False
top.dMemBridge.unburstified.cmd.simPublic()
}
top
}
simConfig.compile(dutGen).doSimUntilVoid(seed = 42){dut =>
// dut.clockDomain.forkSimSpeedPrinter(1.0)
// VexRiscvSmpClusterTestInfrastructure.init(dut)
// val ram = VexRiscvSmpClusterTestInfrastructure.ram(dut, withStall)
// ram.memory.loadBin(0x80000000l, "../opensbi/build/platform/spinal/vexriscv/sim/smp/firmware/fw_payload.bin")
// ram.memory.loadBin(0x80000000l, "../opensbi/build/platform/spinal/vexriscv/sim/smp/firmware/fw_jump.bin")
// ram.memory.loadBin(0xC0000000l, "../buildroot/output/images/Image")
// ram.memory.loadBin(0xC1000000l, "../buildroot/output/images/dtb")
// ram.memory.loadBin(0xC2000000l, "../buildroot/output/images/rootfs.cpio")
dut.clockDomain.forkStimulus(10)
fork {
dut.debugClockDomain.resetSim #= false
sleep (0)
dut.debugClockDomain.resetSim #= true
sleep (10)
dut.debugClockDomain.resetSim #= false
}
// fork{
// disableSimWave()
// val atMs = 130
// val durationMs = 15
// sleep(atMs*1000000)
// enableSimWave()
// println("** enableSimWave **")
// sleep(durationMs*1000000)
// println("** disableSimWave **")
// while(true) {
// disableSimWave()
// sleep(100000 * 10)
// enableSimWave()
// sleep( 100 * 10)
// }
//// simSuccess()
// }
val ram = SparseMemory()
ram.loadBin(0x80000000l, "../opensbi/build/platform/spinal/vexriscv/sim/smp/firmware/fw_jump.bin")
ram.loadBin(0xC0000000l, "../buildroot/output/images/Image")
ram.loadBin(0xC1000000l, "../buildroot/output/images/dtb")
ram.loadBin(0xC2000000l, "../buildroot/output/images/rootfs.cpio")
dut.io.iMem.simSlave(ram, dut.clockDomain)
dut.io.dMem.simSlave(ram, dut.clockDomain, dut.dMemBridge.unburstified)
dut.io.externalInterrupts #= 0
dut.io.externalSupervisorInterrupts #= 0
dut.clockDomain.onSamplings{
if(dut.io.peripheral.CYC.toBoolean){
(dut.io.peripheral.ADR.toLong << 2) match {
case 0xF0000000l => print(dut.io.peripheral.DAT_MOSI.toLong.toChar)
case 0xF0000004l => dut.io.peripheral.DAT_MISO #= (if(System.in.available() != 0) System.in.read() else 0xFFFFFFFFl)
case _ =>
}
// println(f"${dut.io.peripheral.ADR.toLong}%x")
}
}
// fork{
// disableSimWave()
// val atMs = 8
// val durationMs = 3
// sleep(atMs*1000000)
// enableSimWave()
// println("** enableSimWave **")
// sleep(durationMs*1000000)
// println("** disableSimWave **")
// while(true) {
// disableSimWave()
// sleep(100000 * 10)
// enableSimWave()
// sleep( 100 * 10)
// }
// // simSuccess()
// }
fork{
while(true) {