diff --git a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala index b8f5f82..7fb693d 100644 --- a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala +++ b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala @@ -22,7 +22,7 @@ import scala.collection.mutable import scala.collection.mutable.ArrayBuffer import spinal.lib.generator._ -case class VexRiscvSmpClusterParameter( cpuConfigs : Seq[VexRiscvConfig]) +case class VexRiscvSmpClusterParameter(cpuConfigs : Seq[VexRiscvConfig], withExclusiveAndInvalidation : Boolean) class VexRiscvSmpClusterBase(p : VexRiscvSmpClusterParameter) extends Generator{ val cpuCount = p.cpuConfigs.size @@ -44,16 +44,28 @@ class VexRiscvSmpClusterBase(p : VexRiscvSmpClusterParameter) extends Generator{ val debugPort = debugBridge.produceIo(debugBridge.logic.jtagBridge.io.ctrl) - val exclusiveMonitor = BmbExclusiveMonitorGenerator() - val invalidationMonitor = BmbInvalidateMonitorGenerator() - interconnect.addConnection(exclusiveMonitor.output, invalidationMonitor.input) - interconnect.masters(invalidationMonitor.output).withOutOfOrderDecoder() + val dBusCoherent = BmbBridgeGenerator() + val dBusNonCoherent = BmbBridgeGenerator() + + val smp = p.withExclusiveAndInvalidation generate new Area{ + val exclusiveMonitor = BmbExclusiveMonitorGenerator() + interconnect.addConnection(dBusCoherent.bmb, exclusiveMonitor.input) + + val invalidationMonitor = BmbInvalidateMonitorGenerator() + interconnect.addConnection(exclusiveMonitor.output, invalidationMonitor.input) + interconnect.addConnection(invalidationMonitor.output, dBusNonCoherent.bmb) + interconnect.masters(invalidationMonitor.output).withOutOfOrderDecoder() + } + + val noSmp = !p.withExclusiveAndInvalidation generate new Area{ + interconnect.addConnection(dBusCoherent.bmb, dBusNonCoherent.bmb) + } val cores = for(cpuId <- 0 until cpuCount) yield new Area{ val cpu = VexRiscvBmbGenerator() cpu.config.load(p.cpuConfigs(cpuId)) interconnect.addConnection( - cpu.dBus -> List(exclusiveMonitor.input) + cpu.dBus -> List(dBusCoherent.bmb) ) cpu.enableDebugBmb( debugCd = debugCd, @@ -113,7 +125,8 @@ object VexRiscvSmpClusterGen { ioRange : UInt => Bool = (x => x(31 downto 28) === 0xF), resetVector : Long = 0x80000000l, iBusWidth : Int = 128, - dBusWidth : Int = 64) = { + dBusWidth : Int = 64, + coherency : Boolean = true) = { val config = VexRiscvConfig( plugins = List( @@ -167,8 +180,8 @@ object VexRiscvSmpClusterGen { catchUnaligned = true, withLrSc = true, withAmo = true, - withExclusive = true, - withInvalidate = true, + withExclusive = coherency, + withInvalidate = coherency, aggregationWidth = if(dBusWidth == 32) 0 else log2Up(dBusWidth/8) // ) ), diff --git a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexCluster.scala b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexCluster.scala index ea8c95b..43bcc38 100644 --- a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexCluster.scala +++ b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexCluster.scala @@ -22,11 +22,11 @@ class VexRiscvLitexSmpCluster(p : VexRiscvLitexSmpClusterParameter) extends VexR for(core <- cores) interconnect.addConnection(core.cpu.iBus -> List(iArbiter.bmb)) interconnect.addConnection( - iArbiter.bmb -> List(iBridge.bmb, peripheralBridge.bmb), - invalidationMonitor.output -> List(dBridge.bmb, peripheralBridge.bmb) + iArbiter.bmb -> List(iBridge.bmb, peripheralBridge.bmb), + dBusNonCoherent.bmb -> List(dBridge.bmb, peripheralBridge.bmb) ) - if(p.coherentDma || p.cluster.cpuConfigs.size > 1) interconnect.masters(invalidationMonitor.output).withOutOfOrderDecoder() + if(p.cluster.withExclusiveAndInvalidation) interconnect.masters(dBusNonCoherent.bmb).withOutOfOrderDecoder() dBridge.liteDramParameter.load(p.liteDram) iBridge.liteDramParameter.load(p.liteDram) @@ -41,7 +41,7 @@ class VexRiscvLitexSmpCluster(p : VexRiscvLitexSmpClusterParameter) extends VexR dataWidth = p.cluster.cpuConfigs.head.find(classOf[DBusCachedPlugin]).get.config.memDataWidth, useSTALL = true )) - interconnect.addConnection(bridge.bmb, exclusiveMonitor.input) + interconnect.addConnection(bridge.bmb, dBusCoherent.bmb) } // Interconnect pipelining (FMax) @@ -50,7 +50,7 @@ class VexRiscvLitexSmpCluster(p : VexRiscvLitexSmpClusterParameter) extends VexR interconnect.setPipelining(core.cpu.iBus)(cmdHalfRate = true, rspValid = true) interconnect.setPipelining(iArbiter.bmb)(cmdHalfRate = true, rspValid = true) } - interconnect.setPipelining(invalidationMonitor.output)(cmdValid = true, cmdReady = true, rspValid = true) + interconnect.setPipelining(dBusNonCoherent.bmb)(cmdValid = true, cmdReady = true, rspValid = true) interconnect.setPipelining(peripheralBridge.bmb)(cmdHalfRate = true, rspValid = true) } @@ -74,6 +74,7 @@ object VexRiscvLitexSmpClusterCmdGen extends App { opt[String]("netlist-name") action { (v, c) => netlistName = v } }.parse(args)) + val coherency = coherentDma || cpuCount > 1 def parameter = VexRiscvLitexSmpClusterParameter( cluster = VexRiscvSmpClusterParameter( cpuConfigs = List.tabulate(cpuCount) { hartId => @@ -82,9 +83,11 @@ object VexRiscvLitexSmpClusterCmdGen extends App { ioRange = address => address.msb, resetVector = 0, iBusWidth = iBusWidth, - dBusWidth = dBusWidth + dBusWidth = dBusWidth, + coherency = coherency ) - } + }, + withExclusiveAndInvalidation = coherency ), liteDram = LiteDramNativeParameter(addressWidth = 32, dataWidth = liteDramWidth), liteDramMapping = SizeMapping(0x40000000l, 0x40000000l), @@ -114,7 +117,8 @@ object VexRiscvLitexSmpClusterGen extends App { ioRange = address => address.msb, resetVector = 0 ) - } + }, + withExclusiveAndInvalidation = true ), liteDram = LiteDramNativeParameter(addressWidth = 32, dataWidth = 128), liteDramMapping = SizeMapping(0x40000000l, 0x40000000l), @@ -152,7 +156,8 @@ object VexRiscvLitexSmpClusterOpenSbi extends App{ ioRange = address => address(31 downto 28) === 0xF, resetVector = 0x80000000l ) - } + }, + withExclusiveAndInvalidation = true ), liteDram = LiteDramNativeParameter(addressWidth = 32, dataWidth = 128), liteDramMapping = SizeMapping(0x80000000l, 0x70000000l), diff --git a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexMpCluster.scala b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexMpCluster.scala index 14daae2..e662dfe 100644 --- a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexMpCluster.scala +++ b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexMpCluster.scala @@ -7,64 +7,64 @@ import spinal.lib.bus.wishbone.{WishboneConfig, WishboneToBmbGenerator} import spinal.lib.sim.SparseMemory import vexriscv.demo.smp.VexRiscvSmpClusterGen.vexRiscvConfig -case class VexRiscvLitexSmpMpClusterParameter( cluster : VexRiscvSmpClusterParameter, - liteDram : LiteDramNativeParameter, - liteDramMapping : AddressMapping) - -class VexRiscvLitexSmpMpCluster(p : VexRiscvLitexSmpMpClusterParameter) extends VexRiscvSmpClusterWithPeripherals(p.cluster) { - val iArbiter = BmbBridgeGenerator() - val iBridge = BmbToLiteDramGenerator(p.liteDramMapping) - val dBridge = BmbToLiteDramGenerator(p.liteDramMapping) - - for(core <- cores) interconnect.addConnection(core.cpu.iBus -> List(iArbiter.bmb)) - interconnect.addConnection( - iArbiter.bmb -> List(iBridge.bmb, peripheralBridge.bmb), - invalidationMonitor.output -> List(dBridge.bmb, peripheralBridge.bmb) - ) - interconnect.masters(invalidationMonitor.output).withOutOfOrderDecoder() - - dBridge.liteDramParameter.load(p.liteDram) - iBridge.liteDramParameter.load(p.liteDram) - - // Interconnect pipelining (FMax) - for(core <- cores) { - interconnect.setPipelining(core.cpu.dBus)(cmdValid = true, cmdReady = true, rspValid = true) - interconnect.setPipelining(core.cpu.iBus)(cmdHalfRate = true, rspValid = true) - interconnect.setPipelining(iArbiter.bmb)(cmdHalfRate = true, rspValid = true) - } - interconnect.setPipelining(invalidationMonitor.output)(cmdValid = true, cmdReady = true, rspValid = true) - interconnect.setPipelining(peripheralBridge.bmb)(cmdHalfRate = true, rspValid = true) -} - - -object VexRiscvLitexSmpMpClusterGen extends App { - for(cpuCount <- List(1,2,4,8)) { - def parameter = VexRiscvLitexSmpMpClusterParameter( - cluster = VexRiscvSmpClusterParameter( - cpuConfigs = List.tabulate(cpuCount) { hartId => - vexRiscvConfig( - hartId = hartId, - ioRange = address => address.msb, - resetVector = 0 - ) - } - ), - liteDram = LiteDramNativeParameter(addressWidth = 32, dataWidth = 128), - liteDramMapping = SizeMapping(0x40000000l, 0x40000000l) - ) - - def dutGen = { - val toplevel = new VexRiscvLitexSmpMpCluster( - p = parameter - ).toComponent() - toplevel - } - - val genConfig = SpinalConfig().addStandardMemBlackboxing(blackboxByteEnables) - // genConfig.generateVerilog(Bench.compressIo(dutGen)) - genConfig.generateVerilog(dutGen.setDefinitionName(s"VexRiscvLitexSmpMpCluster_${cpuCount}c")) - } -} +//case class VexRiscvLitexSmpMpClusterParameter( cluster : VexRiscvSmpClusterParameter, +// liteDram : LiteDramNativeParameter, +// liteDramMapping : AddressMapping) +// +//class VexRiscvLitexSmpMpCluster(p : VexRiscvLitexSmpMpClusterParameter) extends VexRiscvSmpClusterWithPeripherals(p.cluster) { +// val iArbiter = BmbBridgeGenerator() +// val iBridge = BmbToLiteDramGenerator(p.liteDramMapping) +// val dBridge = BmbToLiteDramGenerator(p.liteDramMapping) +// +// for(core <- cores) interconnect.addConnection(core.cpu.iBus -> List(iArbiter.bmb)) +// interconnect.addConnection( +// iArbiter.bmb -> List(iBridge.bmb, peripheralBridge.bmb), +// invalidationMonitor.output -> List(dBridge.bmb, peripheralBridge.bmb) +// ) +// interconnect.masters(invalidationMonitor.output).withOutOfOrderDecoder() +// +// dBridge.liteDramParameter.load(p.liteDram) +// iBridge.liteDramParameter.load(p.liteDram) +// +// // Interconnect pipelining (FMax) +// for(core <- cores) { +// interconnect.setPipelining(core.cpu.dBus)(cmdValid = true, cmdReady = true, rspValid = true) +// interconnect.setPipelining(core.cpu.iBus)(cmdHalfRate = true, rspValid = true) +// interconnect.setPipelining(iArbiter.bmb)(cmdHalfRate = true, rspValid = true) +// } +// interconnect.setPipelining(invalidationMonitor.output)(cmdValid = true, cmdReady = true, rspValid = true) +// interconnect.setPipelining(peripheralBridge.bmb)(cmdHalfRate = true, rspValid = true) +//} +// +// +//object VexRiscvLitexSmpMpClusterGen extends App { +// for(cpuCount <- List(1,2,4,8)) { +// def parameter = VexRiscvLitexSmpMpClusterParameter( +// cluster = VexRiscvSmpClusterParameter( +// cpuConfigs = List.tabulate(cpuCount) { hartId => +// vexRiscvConfig( +// hartId = hartId, +// ioRange = address => address.msb, +// resetVector = 0 +// ) +// } +// ), +// liteDram = LiteDramNativeParameter(addressWidth = 32, dataWidth = 128), +// liteDramMapping = SizeMapping(0x40000000l, 0x40000000l) +// ) +// +// def dutGen = { +// val toplevel = new VexRiscvLitexSmpMpCluster( +// p = parameter +// ).toComponent() +// toplevel +// } +// +// val genConfig = SpinalConfig().addStandardMemBlackboxing(blackboxByteEnables) +// // genConfig.generateVerilog(Bench.compressIo(dutGen)) +// genConfig.generateVerilog(dutGen.setDefinitionName(s"VexRiscvLitexSmpMpCluster_${cpuCount}c")) +// } +//} diff --git a/src/main/scala/vexriscv/ip/DataCache.scala b/src/main/scala/vexriscv/ip/DataCache.scala index f712606..7922db8 100644 --- a/src/main/scala/vexriscv/ip/DataCache.scala +++ b/src/main/scala/vexriscv/ip/DataCache.scala @@ -417,7 +417,8 @@ case class DataCacheMemBus(p : DataCacheConfig) extends Bundle with IMasterSlave val timer = Reg(UInt(log2Up(timeoutCycles)+1 bits)) init(0) val timerFull = timer.msb val hit = cmd.address(tagRange) === buffer.address(tagRange) - val canAggregate = cmd.valid && cmd.wr && !cmd.uncached && !cmd.exclusive && !timerFull && !aggregationCounterFull && (!buffer.stream.valid || aggregationEnabled && hit) + val cmdExclusive = if(p.withExclusive) cmd.exclusive else False + val canAggregate = cmd.valid && cmd.wr && !cmd.uncached && !cmdExclusive && !timerFull && !aggregationCounterFull && (!buffer.stream.valid || aggregationEnabled && hit) val doFlush = cmd.valid && !canAggregate || timerFull || aggregationCounterFull || !aggregationEnabled // val canAggregate = False // val doFlush = True @@ -468,7 +469,7 @@ case class DataCacheMemBus(p : DataCacheConfig) extends Bundle with IMasterSlave buffer.length := (cmd.length << 2) | 3 if (p.withExclusive) buffer.exclusive := cmd.exclusive - when(cmd.wr && !cmd.uncached && !cmd.exclusive){ + when(cmd.wr && !cmd.uncached && !cmdExclusive){ aggregationEnabled := True buffer.address(aggregationRange.high downto 0) := 0 buffer.length := p.memDataBytes-1