diff --git a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala index 0a8c3ec..38506f2 100644 --- a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala +++ b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala @@ -54,8 +54,9 @@ case class VexRiscvSmpCluster(p : VexRiscvSmpClusterParameter, val core = new VexRiscv(cpuConfig) core.plugins.foreach { case plugin: IBusCachedPlugin => iBus = plugin.iBus.toBmb() - case plugin: DBusCachedPlugin => dBus = plugin.dBus.toBmb() + case plugin: DBusCachedPlugin => dBus = plugin.dBus.toBmb().pipelined(cmdValid = true) case plugin: CsrPlugin => { + plugin.externalMhartId := cpuId plugin.softwareInterrupt := io.softwareInterrupts(cpuId) plugin.externalInterrupt := io.externalInterrupts(cpuId) plugin.timerInterrupt := io.timerInterrupts(cpuId) @@ -112,9 +113,12 @@ case class VexRiscvSmpCluster(p : VexRiscvSmpClusterParameter, object VexRiscvSmpClusterGen { - def vexRiscvConfig(hartId : Int, + def vexRiscvConfig(hartIdWidth : Int, + hartId : Int, ioRange : UInt => Bool = (x => x(31 downto 28) === 0xF), resetVector : Long = 0x80000000l) = { + val iBusWidth = 128 + val dBusWidth = 64 val config = VexRiscvConfig( plugins = List( new MmuPlugin( @@ -135,7 +139,7 @@ object VexRiscvSmpClusterGen { wayCount = 2, addressWidth = 32, cpuDataWidth = 32, - memDataWidth = 128, + memDataWidth = iBusWidth, catchIllegalAccess = true, catchAccessFault = true, asyncTagMemory = false, @@ -151,7 +155,7 @@ object VexRiscvSmpClusterGen { ) ), new DBusCachedPlugin( - dBusCmdMasterPipe = true, + dBusCmdMasterPipe = dBusWidth == 32, dBusCmdSlavePipe = true, dBusRspSlavePipe = true, relaxedMemoryTranslationRegister = true, @@ -161,14 +165,15 @@ object VexRiscvSmpClusterGen { wayCount = 1, addressWidth = 32, cpuDataWidth = 32, - memDataWidth = 32, + memDataWidth = dBusWidth, catchAccessError = true, catchIllegal = true, catchUnaligned = true, withLrSc = true, withAmo = true, withExclusive = true, - withInvalidate = true + withInvalidate = true, + aggregationWidth = if(dBusWidth == 32) 0 else log2Up(dBusWidth/8) // ) ), memoryTranslatorPortConfig = MmuPortConfig( @@ -208,7 +213,7 @@ object VexRiscvSmpClusterGen { mulUnrollFactor = 32, divUnrollFactor = 1 ), - new CsrPlugin(CsrPluginConfig.openSbi(hartId = hartId, misa = Riscv.misaToInt("imas"))), + new CsrPlugin(CsrPluginConfig.openSbi(misa = Riscv.misaToInt("imas")).copy(withExternalMhartid = true, mhartidWidth = hartIdWidth)), new BranchPlugin( earlyBranch = false, catchAddressMisaligned = true, @@ -224,7 +229,7 @@ object VexRiscvSmpClusterGen { debugClockDomain = ClockDomain.current.copy(reset = Bool().setName("debugResetIn")), p = VexRiscvSmpClusterParameter( cpuConfigs = List.tabulate(cpuCount) { - vexRiscvConfig(_, resetVector = resetVector) + vexRiscvConfig(log2Up(cpuCount), _, resetVector = resetVector) } ) ) @@ -462,7 +467,7 @@ object VexRiscvSmpClusterOpenSbi extends App{ simConfig.allOptimisation simConfig.addSimulatorFlag("--threads 1") - val cpuCount = 1 + val cpuCount = 4 val withStall = false def gen = { @@ -573,8 +578,8 @@ object VexRiscvSmpClusterOpenSbi extends App{ // fork{ // disableSimWave() -// val atMs = 130 -// val durationMs = 15 +// val atMs = 3790 +// val durationMs = 5 // sleep(atMs*1000000) // enableSimWave() // println("** enableSimWave **") diff --git a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexCluster.scala b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexCluster.scala index 03ccc89..3f3047f 100644 --- a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexCluster.scala +++ b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexCluster.scala @@ -73,13 +73,13 @@ case class LiteDramNative(p : LiteDramNativeParameter) extends Bundle with IMast } var writeCmdCounter, writeDataCounter = 0 - StreamReadyRandomizer(bus.cmd, cd) + StreamReadyRandomizer(bus.cmd, cd).factor = 0.5f StreamMonitor(bus.cmd, cd) { t => cmdQueue.enqueue(Cmd(t.addr.toLong * (p.dataWidth/8) , t.we.toBoolean)) if(t.we.toBoolean) writeCmdCounter += 1 } - StreamReadyRandomizer(bus.wdata, cd) + StreamReadyRandomizer(bus.wdata, cd).factor = 0.5f StreamMonitor(bus.wdata, cd) { p => writeDataCounter += 1 // if(p.data.toBigInt == BigInt("00000002000000020000000200000002",16)){ @@ -175,16 +175,19 @@ case class BmbToLiteDram(bmbParameter : BmbParameter, val halt = Bool() val (cmdFork, dataFork) = StreamFork2(unburstified.cmd.haltWhen(halt)) - io.output.cmd.arbitrationFrom(cmdFork.haltWhen(pendingRead.msb)) - io.output.cmd.addr := (cmdFork.address >> log2Up(liteDramParameter.dataWidth/8)).resized - io.output.cmd.we := cmdFork.isWrite + val outputCmd = Stream(LiteDramNativeCmd(liteDramParameter)) + outputCmd.arbitrationFrom(cmdFork.haltWhen(pendingRead.msb)) + outputCmd.addr := (cmdFork.address >> log2Up(liteDramParameter.dataWidth/8)).resized + outputCmd.we := cmdFork.isWrite + + io.output.cmd <-< outputCmd if(bmbParameter.canWrite) { val wData = Stream(LiteDramNativeWData(liteDramParameter)) wData.arbitrationFrom(dataFork.throwWhen(dataFork.isRead)) wData.data := dataFork.data wData.we := dataFork.mask - io.output.wdata << wData.queue(wdataFifoSize) + io.output.wdata << wData.queueLowLatency(wdataFifoSize, latency = 1) //TODO queue low latency } else { dataFork.ready := True io.output.wdata.valid := False @@ -212,7 +215,7 @@ case class BmbToLiteDram(bmbParameter : BmbParameter, unburstified.rsp.data := rdataFifo.data - pendingRead := pendingRead + U(io.output.cmd.fire && !io.output.cmd.we) - U(rdataFifo.fire) + pendingRead := pendingRead + U(outputCmd.fire && !outputCmd.we) - U(rdataFifo.fire) } object BmbToLiteDramTester extends App{ @@ -241,6 +244,7 @@ case class VexRiscvLitexSmpClusterParameter( cluster : VexRiscvSmpClusterParamet liteDram : LiteDramNativeParameter, liteDramMapping : AddressMapping) +//addAttribute("""mark_debug = "true"""") case class VexRiscvLitexSmpCluster(p : VexRiscvLitexSmpClusterParameter, debugClockDomain : ClockDomain) extends Component{ @@ -308,50 +312,59 @@ case class VexRiscvLitexSmpCluster(p : VexRiscvLitexSmpClusterParameter, iBusDecoder.io.input << iBusArbiter.io.output.pipelined(cmdValid = true) val iMem = LiteDramNative(p.liteDram) - val iMemBridge = iMem.fromBmb(iBusDecoder.io.outputs(1), wdataFifoSize = 0, rdataFifoSize = 32) - iMem.cmd >-> io.iMem.cmd - iMem.wdata >> io.iMem.wdata - iMem.rdata << io.iMem.rdata + io.iMem.fromBmb(iBusDecoder.io.outputs(1), wdataFifoSize = 0, rdataFifoSize = 32) + val iBusDecoderToPeripheral = iBusDecoder.io.outputs(0).resize(dataWidth = 32).pipelined(cmdHalfRate = true, rspValid = true) + val dBusDecoderToPeripheral = dBusDecoder.io.outputs(0).resize(dataWidth = 32).pipelined(cmdHalfRate = true, rspValid = true) + val peripheralAccessLength = Math.max(iBusDecoder.io.outputs(0).p.lengthWidth, dBusDecoder.io.outputs(0).p.lengthWidth) val peripheralArbiter = BmbArbiter( - p = dBusDecoder.io.outputs(0).p.copy(sourceWidth = dBusDecoder.io.outputs(0).p.sourceWidth + 1, lengthWidth = peripheralAccessLength), + p = dBusDecoder.io.outputs(0).p.copy( + sourceWidth = List(iBusDecoderToPeripheral, dBusDecoderToPeripheral).map(_.p.sourceWidth).max + 1, + contextWidth = List(iBusDecoderToPeripheral, dBusDecoderToPeripheral).map(_.p.contextWidth).max, + lengthWidth = peripheralAccessLength, + dataWidth = 32 + ), portCount = 2, lowerFirstPriority = true ) - peripheralArbiter.io.inputs(0) << iBusDecoder.io.outputs(0).resize(dataWidth = 32).pipelined(cmdHalfRate = true, rspValid = true) - peripheralArbiter.io.inputs(1) << dBusDecoder.io.outputs(0).resize(dataWidth = 32).pipelined(cmdHalfRate = true, rspValid = true) + peripheralArbiter.io.inputs(0) << iBusDecoderToPeripheral + peripheralArbiter.io.inputs(1) << dBusDecoderToPeripheral val peripheralWishbone = peripheralArbiter.io.output.pipelined(cmdValid = true).toWishbone() io.peripheral << peripheralWishbone } object VexRiscvLitexSmpClusterGen extends App { - val cpuCount = 4 + for(cpuCount <- List(1,2,4,8)) { + def parameter = VexRiscvLitexSmpClusterParameter( + cluster = VexRiscvSmpClusterParameter( + cpuConfigs = List.tabulate(cpuCount) { hartId => + vexRiscvConfig( + hartIdWidth = log2Up(cpuCount), + hartId = hartId, + ioRange = address => address.msb, + resetVector = 0 + ) + } + ), + liteDram = LiteDramNativeParameter(addressWidth = 32, dataWidth = 128), + liteDramMapping = SizeMapping(0x40000000l, 0x40000000l) + ) - def parameter = VexRiscvLitexSmpClusterParameter( - cluster = VexRiscvSmpClusterParameter( - cpuConfigs = List.tabulate(cpuCount) { hartId => - vexRiscvConfig( - hartId = hartId, - ioRange = address => address.msb, - resetVector = 0 - ) - } - ), - liteDram = LiteDramNativeParameter(addressWidth = 32, dataWidth = 128), - liteDramMapping = SizeMapping(0x40000000l, 0x40000000l) - ) + def dutGen = { + val toplevel = VexRiscvLitexSmpCluster( + p = parameter, + debugClockDomain = ClockDomain.current.copy(reset = Bool().setName("debugResetIn")) + ) + toplevel + } - def dutGen = VexRiscvLitexSmpCluster( - p = parameter, - debugClockDomain = ClockDomain.current.copy(reset = Bool().setName("debugResetIn")) - ) - - val genConfig = SpinalConfig().addStandardMemBlackboxing(blackboxByteEnables) -// genConfig.generateVerilog(Bench.compressIo(dutGen)) - genConfig.generateVerilog(dutGen) + val genConfig = SpinalConfig().addStandardMemBlackboxing(blackboxByteEnables) + // genConfig.generateVerilog(Bench.compressIo(dutGen)) + genConfig.generateVerilog(dutGen.setDefinitionName(s"VexRiscvLitexSmpCluster_${cpuCount}c")) + } } @@ -363,13 +376,13 @@ object VexRiscvLitexSmpClusterOpenSbi extends App{ simConfig.withWave simConfig.allOptimisation - val cpuCount = 4 - val withStall = false + val cpuCount = 8 def parameter = VexRiscvLitexSmpClusterParameter( cluster = VexRiscvSmpClusterParameter( cpuConfigs = List.tabulate(cpuCount) { hartId => vexRiscvConfig( + hartIdWidth = log2Up(cpuCount), hartId = hartId, ioRange = address => address(31 downto 28) === 0xF, resetVector = 0x80000000l @@ -440,12 +453,12 @@ object VexRiscvLitexSmpClusterOpenSbi extends App{ // fork{ // disableSimWave() -// val atMs = 8 -// val durationMs = 3 -// sleep(atMs*1000000) +// val atMs = 3790 +// val durationMs = 5 +// sleep(atMs*1000000l) // enableSimWave() // println("** enableSimWave **") -// sleep(durationMs*1000000) +// sleep(durationMs*1000000l) // println("** disableSimWave **") // while(true) { // disableSimWave() @@ -453,7 +466,7 @@ object VexRiscvLitexSmpClusterOpenSbi extends App{ // enableSimWave() // sleep( 100 * 10) // } -// // simSuccess() +// // simSuccess() // } fork{ diff --git a/src/main/scala/vexriscv/ip/DataCache.scala b/src/main/scala/vexriscv/ip/DataCache.scala index 2f2e8c2..82fa3af 100644 --- a/src/main/scala/vexriscv/ip/DataCache.scala +++ b/src/main/scala/vexriscv/ip/DataCache.scala @@ -5,7 +5,7 @@ import spinal.core._ import spinal.lib._ import spinal.lib.bus.amba4.axi.{Axi4Config, Axi4Shared} import spinal.lib.bus.avalon.{AvalonMM, AvalonMMConfig} -import spinal.lib.bus.bmb.{Bmb, BmbParameter} +import spinal.lib.bus.bmb.{Bmb, BmbCmd, BmbParameter} import spinal.lib.bus.wishbone.{Wishbone, WishboneConfig} import spinal.lib.bus.simple._ import vexriscv.plugin.DBusSimpleBus @@ -29,7 +29,8 @@ case class DataCacheConfig(cacheSize : Int, withInvalidate : Boolean = false, pendingMax : Int = 32, directTlbHit : Boolean = false, - mergeExecuteMemory : Boolean = false){ + mergeExecuteMemory : Boolean = false, + aggregationWidth : Int = 0){ assert(!(mergeExecuteMemory && (earlyDataMux || earlyWaysHits))) assert(!(earlyDataMux && !earlyWaysHits)) assert(isPow2(pendingMax)) @@ -41,6 +42,8 @@ case class DataCacheConfig(cacheSize : Int, def withInternalLrSc = withLrSc && !withExclusive def withExternalLrSc = withLrSc && withExclusive def withExternalAmo = withAmo && withExclusive + def cpuDataBytes = cpuDataWidth/8 + def memDataBytes = memDataWidth/8 def getAxi4SharedConfig() = Axi4Config( addressWidth = addressWidth, dataWidth = memDataWidth, @@ -79,10 +82,10 @@ case class DataCacheConfig(cacheSize : Int, def getBmbParameter() = BmbParameter( addressWidth = 32, - dataWidth = 32, + dataWidth = memDataWidth, lengthWidth = log2Up(this.bytePerLine), sourceWidth = 0, - contextWidth = if(!withWriteResponse) 1 else 0, + contextWidth = (if(!withWriteResponse) 1 else 0) + (if(cpuDataWidth != memDataWidth) log2Up(memDataBytes) else 0), canRead = true, canWrite = true, alignment = BmbParameter.BurstAlignement.LENGTH, @@ -203,6 +206,7 @@ case class DataCacheMemCmd(p : DataCacheConfig) extends Bundle{ val last = Bool } case class DataCacheMemRsp(p : DataCacheConfig) extends Bundle{ + val aggregated = UInt(p.aggregationWidth bits) val last = Bool() val data = Bits(p.memDataWidth bit) val error = Bool @@ -217,7 +221,7 @@ case class DataCacheAck(p : DataCacheConfig) extends Bundle{ } case class DataCacheSync(p : DataCacheConfig) extends Bundle{ - + val aggregated = UInt(p.aggregationWidth bits) } case class DataCacheMemBus(p : DataCacheConfig) extends Bundle with IMasterSlave{ @@ -369,21 +373,133 @@ case class DataCacheMemBus(p : DataCacheConfig) extends Bundle with IMasterSlave } - def toBmb() : Bmb = { + def toBmb(syncPendingMax : Int = 16, + timeoutCycles : Int = 16) : Bmb = new Area{ + setCompositeName(DataCacheMemBus.this, "Bridge", true) val pipelinedMemoryBusConfig = p.getBmbParameter() val bus = Bmb(pipelinedMemoryBusConfig).setCompositeName(this,"toBmb", true) + val aggregationMax = p.memDataBytes - bus.cmd.valid := cmd.valid - bus.cmd.last := cmd.last - if(!p.withWriteResponse) bus.cmd.context(0) := cmd.wr - bus.cmd.opcode := (cmd.wr ? B(Bmb.Cmd.Opcode.WRITE) | B(Bmb.Cmd.Opcode.READ)) - bus.cmd.address := cmd.address.resized - bus.cmd.data := cmd.data - bus.cmd.length := (cmd.length << 2) | 3 //TODO better sub word access - bus.cmd.mask := cmd.mask - if(p.withExclusive) bus.cmd.exclusive := cmd.exclusive + case class Context() extends Bundle{ + val isWrite = !p.withWriteResponse generate Bool() + val rspCount = (p.cpuDataWidth != p.memDataWidth) generate UInt(log2Up(aggregationMax) bits) + } + + val withoutWriteBuffer = if(p.cpuDataWidth == p.memDataWidth) new Area { + val busCmdContext = Context() + + bus.cmd.valid := cmd.valid + bus.cmd.last := cmd.last + bus.cmd.opcode := (cmd.wr ? B(Bmb.Cmd.Opcode.WRITE) | B(Bmb.Cmd.Opcode.READ)) + bus.cmd.address := cmd.address.resized + bus.cmd.data := cmd.data + bus.cmd.length := (cmd.length << 2) | 3 + bus.cmd.mask := cmd.mask + if (p.withExclusive) bus.cmd.exclusive := cmd.exclusive + if (!p.withWriteResponse) busCmdContext.isWrite := cmd.wr + bus.cmd.context := B(busCmdContext) + + cmd.ready := bus.cmd.ready + if(p.withInvalidate) sync.arbitrationFrom(bus.sync) + } + + val withWriteBuffer = if(p.cpuDataWidth != p.memDataWidth) new Area { + val buffer = new Area { + val stream = cmd.toEvent().m2sPipe() + val address = Reg(UInt(p.addressWidth bits)) + val length = Reg(UInt(pipelinedMemoryBusConfig.lengthWidth bits)) + val write = Reg(Bool) + val exclusive = Reg(Bool) + val data = Reg(Bits(p.memDataWidth bits)) + val mask = Reg(Bits(p.memDataWidth/8 bits)) init(0) + } + + val aggregationRange = log2Up(p.memDataWidth/8)-1 downto log2Up(p.cpuDataWidth/8) + val tagRange = p.addressWidth-1 downto aggregationRange.high+1 + val aggregationEnabled = Reg(Bool) + val aggregationCounter = Reg(UInt(log2Up(aggregationMax) bits)) init(0) + val aggregationCounterFull = aggregationCounter === aggregationCounter.maxValue + val timer = Reg(UInt(log2Up(timeoutCycles)+1 bits)) init(0) + val timerFull = timer.msb + val hit = cmd.address(tagRange) === buffer.address(tagRange) + val canAggregate = cmd.valid && cmd.wr && !cmd.uncached && !cmd.exclusive && !timerFull && !aggregationCounterFull && (!buffer.stream.valid || aggregationEnabled && hit) + val doFlush = cmd.valid && !canAggregate || timerFull || aggregationCounterFull || !aggregationEnabled +// val canAggregate = False +// val doFlush = True + val busCmdContext = Context() + val halt = False + + when(cmd.fire){ + aggregationCounter := aggregationCounter + 1 + } + when(buffer.stream.valid && !timerFull){ + timer := timer + 1 + } + when(bus.cmd.fire || !buffer.stream.valid){ + buffer.mask := 0 + aggregationCounter := 0 + timer := 0 + } + + buffer.stream.ready := (bus.cmd.ready && doFlush || canAggregate) && !halt + bus.cmd.valid := buffer.stream.valid && doFlush && !halt + bus.cmd.last := True + bus.cmd.opcode := (buffer.write ? B(Bmb.Cmd.Opcode.WRITE) | B(Bmb.Cmd.Opcode.READ)) + bus.cmd.address := buffer.address + bus.cmd.length := buffer.length + bus.cmd.data := buffer.data + bus.cmd.mask := buffer.mask + + if (p.withExclusive) bus.cmd.exclusive := buffer.exclusive + bus.cmd.context.removeAssignments() := B(busCmdContext) + if (!p.withWriteResponse) busCmdContext.isWrite := bus.cmd.isWrite + busCmdContext.rspCount := aggregationCounter + + val aggregationSel = cmd.address(aggregationRange) + when(cmd.fire){ + val dIn = cmd.data.subdivideIn(8 bits) + val dReg = buffer.data.subdivideIn(8 bits) + for(byteId <- 0 until p.memDataBytes){ + when(aggregationSel === byteId / p.cpuDataBytes && cmd.mask(byteId % p.cpuDataBytes)){ + dReg.write(byteId, dIn(byteId % p.cpuDataBytes)) + buffer.mask(byteId) := True + } + } + } + + when(cmd.fire){ + buffer.write := cmd.wr + buffer.address := cmd.address.resized + buffer.length := (cmd.length << 2) | 3 + if (p.withExclusive) buffer.exclusive := cmd.exclusive + + when(cmd.wr && !cmd.uncached && !cmd.exclusive){ + aggregationEnabled := True + buffer.address(aggregationRange.high downto 0) := 0 + buffer.length := p.memDataBytes-1 + } otherwise { + aggregationEnabled := False + } + } + + + val rspCtx = bus.rsp.context.as(Context()) + rsp.aggregated := rspCtx.rspCount + + val syncLogic = p.withInvalidate generate new Area{ + val cmdCtx = Stream(UInt(log2Up(aggregationMax) bits)) + cmdCtx.valid := bus.cmd.fire && bus.cmd.isWrite + cmdCtx.payload := aggregationCounter + halt setWhen(!cmdCtx.ready) + + val syncCtx = cmdCtx.queueLowLatency(syncPendingMax, latency = 1) + syncCtx.ready := bus.sync.fire + + sync.arbitrationFrom(bus.sync) + sync.aggregated := syncCtx.payload + } + } - cmd.ready := bus.cmd.ready rsp.valid := bus.rsp.valid if(!p.withWriteResponse) rsp.valid clearWhen(bus.rsp.context(0)) @@ -399,21 +515,9 @@ case class DataCacheMemBus(p : DataCacheConfig) extends Bundle with IMasterSlave inv.enable := bus.inv.all bus.ack.arbitrationFrom(ack) - - sync.arbitrationFrom(bus.sync) - -// bus.ack.arbitrationFrom(ack) -// //TODO manage lenght ? -// inv.address := bus.inv.address -//// inv.opcode := bus.inv.opcode -// ??? -// -// bus.ack.arbitrationFrom(ack) + // //TODO manage lenght ? } - - - bus - } + }.bus } @@ -537,7 +641,7 @@ class DataCache(val p : DataCacheConfig, mmuParameter : MemoryTranslatorBusParam val memCmdSent = RegInit(False) setWhen (io.mem.cmd.ready) clearWhen (!io.cpu.writeBack.isStuck) val pending = withExclusive generate new Area{ val counter = Reg(UInt(log2Up(pendingMax) + 1 bits)) init(0) - val counterNext = counter + U(io.mem.cmd.fire && io.mem.cmd.last) - U(io.mem.rsp.valid && io.mem.rsp.last) + val counterNext = counter + U(io.mem.cmd.fire && io.mem.cmd.last) - ((io.mem.rsp.valid && io.mem.rsp.last) ? (io.mem.rsp.aggregated +^ 1) | 0) counter := counterNext val done = RegNext(counterNext === 0) @@ -554,7 +658,7 @@ class DataCache(val p : DataCacheConfig, mmuParameter : MemoryTranslatorBusParam val sync = withInvalidate generate new Area{ io.mem.sync.ready := True - + val syncCount = io.mem.sync.aggregated +^ 1 val syncContext = new Area{ val history = Mem(Bool, pendingMax) val wPtr, rPtr = Reg(UInt(log2Up(pendingMax)+1 bits)) init(0) @@ -564,7 +668,7 @@ class DataCache(val p : DataCacheConfig, mmuParameter : MemoryTranslatorBusParam } when(io.mem.sync.fire){ - rPtr := rPtr + 1 + rPtr := rPtr + syncCount } val uncached = history.readAsync(rPtr.resized) val full = RegNext(wPtr - rPtr >= pendingMax-1) @@ -573,7 +677,7 @@ class DataCache(val p : DataCacheConfig, mmuParameter : MemoryTranslatorBusParam def pending(inc : Bool, dec : Bool) = new Area { val pendingSync = Reg(UInt(log2Up(pendingMax) + 1 bits)) init(0) - val pendingSyncNext = pendingSync + U(io.mem.cmd.fire && io.mem.cmd.wr && inc) - U(io.mem.sync.fire && dec) + val pendingSyncNext = pendingSync + U(io.mem.cmd.fire && io.mem.cmd.wr && inc) - ((io.mem.sync.fire && dec) ? syncCount | 0) pendingSync := pendingSyncNext } @@ -582,7 +686,7 @@ class DataCache(val p : DataCacheConfig, mmuParameter : MemoryTranslatorBusParam def track(load : Bool, uncached : Boolean) = new Area { val counter = Reg(UInt(log2Up(pendingMax) + 1 bits)) init(0) - counter := counter - U(io.mem.sync.fire && counter =/= 0 && (if(uncached) syncContext.uncached else !syncContext.uncached)) + counter := counter - ((io.mem.sync.fire && counter =/= 0 && (if(uncached) syncContext.uncached else !syncContext.uncached)) ? syncCount | 0) when(load){ counter := (if(uncached) writeUncached.pendingSyncNext else writeCached.pendingSyncNext) } val busy = counter =/= 0 diff --git a/src/main/scala/vexriscv/plugin/CsrPlugin.scala b/src/main/scala/vexriscv/plugin/CsrPlugin.scala index 43dbfaf..23f3323 100644 --- a/src/main/scala/vexriscv/plugin/CsrPlugin.scala +++ b/src/main/scala/vexriscv/plugin/CsrPlugin.scala @@ -39,7 +39,7 @@ case class CsrPluginConfig( marchid : BigInt, mimpid : BigInt, mhartid : BigInt, - misaExtensionsInit : Int, + misaExtensionsInit : Int, misaAccess : CsrAccess, mtvecAccess : CsrAccess, mtvecInit : BigInt, @@ -68,6 +68,8 @@ case class CsrPluginConfig( satpAccess : CsrAccess = CsrAccess.NONE, medelegAccess : CsrAccess = CsrAccess.NONE, midelegAccess : CsrAccess = CsrAccess.NONE, + withExternalMhartid : Boolean = false, + mhartidWidth : Int = 0, pipelineCsrRead : Boolean = false, pipelinedInterrupt : Boolean = true, csrOhDecoder : Boolean = true, @@ -85,12 +87,12 @@ object CsrPluginConfig{ def small : CsrPluginConfig = small(0x00000020l) def smallest : CsrPluginConfig = smallest(0x00000020l) - def openSbi(hartId : Int, misa : Int) = CsrPluginConfig( + def openSbi(misa : Int) = CsrPluginConfig( catchIllegalAccess = true, mvendorid = 0, marchid = 0, mimpid = 0, - mhartid = hartId, + mhartid = 0, misaExtensionsInit = misa, misaAccess = CsrAccess.READ_ONLY, mtvecAccess = CsrAccess.READ_WRITE, //Could have been WRITE_ONLY :( @@ -387,6 +389,7 @@ class CsrPlugin(val config: CsrPluginConfig) extends Plugin[VexRiscv] with Excep var contextSwitching : Bool = null var thirdPartyWake : Bool = null var inWfi : Bool = null + var externalMhartId : UInt = null override def askWake(): Unit = thirdPartyWake := True @@ -515,6 +518,8 @@ class CsrPlugin(val config: CsrPluginConfig) extends Plugin[VexRiscv] with Excep pipeline.update(MPP, UInt(2 bits)) + + if(withExternalMhartid) externalMhartId = in UInt(mhartidWidth bits) } def inhibateInterrupts() : Unit = allowInterrupts := False @@ -600,7 +605,8 @@ class CsrPlugin(val config: CsrPluginConfig) extends Plugin[VexRiscv] with Excep if(mvendorid != null) READ_ONLY(CSR.MVENDORID, U(mvendorid)) if(marchid != null) READ_ONLY(CSR.MARCHID , U(marchid )) if(mimpid != null) READ_ONLY(CSR.MIMPID , U(mimpid )) - if(mhartid != null) READ_ONLY(CSR.MHARTID , U(mhartid )) + if(mhartid != null && !withExternalMhartid) READ_ONLY(CSR.MHARTID , U(mhartid )) + if(withExternalMhartid) READ_ONLY(CSR.MHARTID , externalMhartId) misaAccess(CSR.MISA, xlen-2 -> misa.base , 0 -> misa.extensions) //Machine CSR diff --git a/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala b/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala index f133616..0b580d8 100644 --- a/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala +++ b/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala @@ -195,6 +195,7 @@ class DBusCachedPlugin(val config : DataCacheConfig, rsp.exclusive := RegNext(dBus.rsp.exclusive) rsp.error := RegNext(dBus.rsp.error) rsp.last := RegNext(dBus.rsp.last) + rsp.aggregated := RegNext(dBus.rsp.aggregated) rsp.data := RegNextWhen(dBus.rsp.data, dBus.rsp.valid && !cache.io.cpu.writeBack.keepMemRspData) rsp }