diff --git a/project/plugins.sbt b/project/plugins.sbt index e5c4233..60a54de 100644 --- a/project/plugins.sbt +++ b/project/plugins.sbt @@ -1,2 +1,3 @@ addSbtPlugin("com.typesafe.sbteclipse" % "sbteclipse-plugin" % "5.2.4") +addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.14.10") diff --git a/scripts/regression/verilator.mk b/scripts/regression/verilator.mk index b13ca4c..b97b74f 100644 --- a/scripts/regression/verilator.mk +++ b/scripts/regression/verilator.mk @@ -3,9 +3,9 @@ verilator/configure: rm -rf verilator* - wget https://www.veripool.org/ftp/verilator-4.012.tgz + wget https://www.veripool.org/ftp/verilator-4.034.tgz tar xvzf verilator*.t*gz - mv verilator-4.012 verilator + mv verilator-4.034 verilator verilator/Makefile: verilator/configure cd verilator diff --git a/src/main/scala/vexriscv/Riscv.scala b/src/main/scala/vexriscv/Riscv.scala index 91cf876..ee9be3d 100644 --- a/src/main/scala/vexriscv/Riscv.scala +++ b/src/main/scala/vexriscv/Riscv.scala @@ -4,6 +4,8 @@ import spinal.core._ object Riscv{ + def misaToInt(values : String) = values.toLowerCase.map(e => 1 << (e-'a')).reduce(_ | _) + def funct7Range = 31 downto 25 def rdRange = 11 downto 7 def funct3Range = 14 downto 12 @@ -157,7 +159,10 @@ object Riscv{ - def UCYCLE = 0xC00 // UR Machine ucycle counter. - def UCYCLEH = 0xC80 + def UCYCLE = 0xC00 // UR Machine ucycle counter. + def UCYCLEH = 0xC80 + def UTIME = 0xC01 // rdtime + def UTIMEH = 0xC81 + } } diff --git a/src/main/scala/vexriscv/Services.scala b/src/main/scala/vexriscv/Services.scala index 4b0aeca..51dbe6b 100644 --- a/src/main/scala/vexriscv/Services.scala +++ b/src/main/scala/vexriscv/Services.scala @@ -65,20 +65,28 @@ trait RegFileService{ case class MemoryTranslatorCmd() extends Bundle{ val isValid = Bool + val isStuck = Bool val virtualAddress = UInt(32 bits) val bypassTranslation = Bool } -case class MemoryTranslatorRsp() extends Bundle{ +case class MemoryTranslatorRsp(p : MemoryTranslatorBusParameter) extends Bundle{ val physicalAddress = UInt(32 bits) val isIoAccess = Bool val allowRead, allowWrite, allowExecute = Bool val exception = Bool val refilling = Bool + val bypassTranslation = Bool + val ways = Vec(MemoryTranslatorRspWay(), p.wayCount) +} +case class MemoryTranslatorRspWay() extends Bundle{ + val sel = Bool() + val physical = UInt(32 bits) } -case class MemoryTranslatorBus() extends Bundle with IMasterSlave{ - val cmd = MemoryTranslatorCmd() - val rsp = MemoryTranslatorRsp() +case class MemoryTranslatorBusParameter(wayCount : Int = 0, latency : Int = 0) +case class MemoryTranslatorBus(p : MemoryTranslatorBusParameter) extends Bundle with IMasterSlave{ + val cmd = Vec(MemoryTranslatorCmd(), p.latency + 1) + val rsp = MemoryTranslatorRsp(p) val end = Bool val busy = Bool diff --git a/src/main/scala/vexriscv/TestsWorkspace.scala b/src/main/scala/vexriscv/TestsWorkspace.scala index 8db4316..b522aed 100644 --- a/src/main/scala/vexriscv/TestsWorkspace.scala +++ b/src/main/scala/vexriscv/TestsWorkspace.scala @@ -26,76 +26,106 @@ import vexriscv.ip._ import spinal.lib.bus.avalon.AvalonMM import spinal.lib.eda.altera.{InterruptReceiverTag, ResetEmitterTag} + +// make clean all SEED=42 MMU=no STOP_ON_ERROR=yes DBUS_EXCLUSIVE=yes DBUS_INVALIDATE=yes SUPERVISOR=yes REDO=1 DHRYSTONE=yes LRSC=yes AMO=yes LINUX_REGRESSION=yes TRACE=yes TRACE_START=1000000000 FLOW_INFO=ye IBUS_DATA_WIDTH=128 DBUS_DATA_WIDTH=128 +//make clean all SEED=42 MMU=no STOP_ON_ERROR=yes DBUS_EXCLUSIVE=yes DBUS_INVALIDATE=yes SUPERVISOR=yes REDO=1 DHRYSTONE=yes LRSC=yes AMO=yes TRACE=yes TRACE_START=1000000000 FLOW_INFO=ye IBUS_DATA_WIDTH=128 DBUS_DATA_WIDTH=128 LINUX_SOC_SMP=yes VMLINUX=../../../../../buildroot/output/images/Image RAMDISK=../../../../../buildroot/output/images/rootfs.cpio DTB=../../../../../buildroot/output/images/dtb EMULATOR=../../../../../opensbi/build/platform/spinal/vexriscv/sim/smp/firmware/fw_jump.bin object TestsWorkspace { def main(args: Array[String]) { def configFull = { val config = VexRiscvConfig( plugins = List( - // new IBusSimplePlugin( - // resetVector = 0x80000000l, - // cmdForkOnSecondStage = false, - // cmdForkPersistence = false, - // prediction = NONE, - // historyRamSizeLog2 = 10, - // catchAccessFault = false, - // compressedGen = false, - // busLatencyMin = 1, - // injectorStage = true - // ), + new MmuPlugin( + ioRange = x => x(31 downto 28) === 0xF + ), + //Uncomment the whole IBusSimplePlugin and comment IBusCachedPlugin if you want uncached iBus config + // new IBusSimplePlugin( + // resetVector = 0x80000000l, + // cmdForkOnSecondStage = false, + // cmdForkPersistence = false, + // prediction = DYNAMIC_TARGET, + // historyRamSizeLog2 = 10, + // catchAccessFault = true, + // compressedGen = true, + // busLatencyMin = 1, + // injectorStage = true, + // memoryTranslatorPortConfig = withMmu generate MmuPortConfig( + // portTlbSize = 4 + // ) + // ), + + //Uncomment the whole IBusCachedPlugin and comment IBusSimplePlugin if you want cached iBus config new IBusCachedPlugin( resetVector = 0x80000000l, compressedGen = false, - prediction = NONE, - injectorStage = true, + prediction = STATIC, + injectorStage = false, config = InstructionCacheConfig( - cacheSize = 4096, - bytePerLine = 32, - wayCount = 1, + cacheSize = 4096*2, + bytePerLine = 64, + wayCount = 2, addressWidth = 32, cpuDataWidth = 32, - memDataWidth = 32, + memDataWidth = 128, catchIllegalAccess = true, catchAccessFault = true, asyncTagMemory = false, - twoCycleRam = false, - twoCycleCache = true + twoCycleRam = true, + twoCycleCache = true, + reducedBankWidth = true + // ) ), - memoryTranslatorPortConfig = MemoryTranslatorPortConfig( - portTlbSize = 4 + memoryTranslatorPortConfig = MmuPortConfig( + portTlbSize = 4, + latency = 1, + earlyRequireMmuLockup = true, + earlyCacheHits = true ) ), -// ).newTightlyCoupledPort(TightlyCoupledPortParameter("iBusTc", a => a(30 downto 28) === 0x0 && a(5))), - // new DBusSimplePlugin( - // catchAddressMisaligned = true, - // catchAccessFault = false, - // earlyInjection = false - // ), + // ).newTightlyCoupledPort(TightlyCoupledPortParameter("iBusTc", a => a(30 downto 28) === 0x0 && a(5))), + // new DBusSimplePlugin( + // catchAddressMisaligned = true, + // catchAccessFault = true, + // earlyInjection = false, + // withLrSc = true, + // memoryTranslatorPortConfig = withMmu generate MmuPortConfig( + // portTlbSize = 4 + // ) + // ), new DBusCachedPlugin( + dBusCmdMasterPipe = true, + dBusCmdSlavePipe = true, + dBusRspSlavePipe = true, config = new DataCacheConfig( - cacheSize = 4096, - bytePerLine = 32, + cacheSize = 4096*1, + bytePerLine = 64, wayCount = 1, addressWidth = 32, cpuDataWidth = 32, - memDataWidth = 32, + memDataWidth = 128, catchAccessError = true, catchIllegal = true, catchUnaligned = true, - withLrSc = true + withLrSc = true, + withAmo = true, + withExclusive = true, + withInvalidate = true, + pendingMax = 32 + // ) ), - // memoryTranslatorPortConfig = null - memoryTranslatorPortConfig = MemoryTranslatorPortConfig( - portTlbSize = 6 + memoryTranslatorPortConfig = MmuPortConfig( + portTlbSize = 4, + latency = 1, + earlyRequireMmuLockup = true, + earlyCacheHits = true ) ), - // new StaticMemoryTranslatorPlugin( + + // new MemoryTranslatorPlugin( + // tlbSize = 32, + // virtualRange = _(31 downto 28) === 0xC, // ioRange = _(31 downto 28) === 0xF // ), - new MemoryTranslatorPlugin( - tlbSize = 32, - virtualRange = _(31 downto 28) === 0xC, - ioRange = _(31 downto 28) === 0xF - ), + new DecoderSimplePlugin( catchIllegalInstruction = true ), @@ -107,7 +137,7 @@ object TestsWorkspace { new SrcPlugin( separatedAddSub = false ), - new FullBarrelShifterPlugin(earlyInjection = true), + new FullBarrelShifterPlugin(earlyInjection = false), // new LightShifterPlugin, new HazardSimplePlugin( bypassExecute = true, @@ -128,7 +158,7 @@ object TestsWorkspace { divUnrollFactor = 1 ), // new DivPlugin, - new CsrPlugin(CsrPluginConfig.all(0x80000020l)), + new CsrPlugin(CsrPluginConfig.all2(0x80000020l).copy(ebreakGen = false, misaExtensionsInit = Riscv.misaToInt("imas"))), // new CsrPlugin(//CsrPluginConfig.all2(0x80000020l).copy(ebreakGen = true)/* // CsrPluginConfig( // catchIllegalAccess = false, @@ -154,9 +184,9 @@ object TestsWorkspace { // )), new DebugPlugin(ClockDomain.current.clone(reset = Bool().setName("debugReset"))), new BranchPlugin( - earlyBranch = true, + earlyBranch = false, catchAddressMisaligned = true, - fenceiGenAsAJump = true + fenceiGenAsAJump = false ), new YamlPlugin("cpu0.yaml") ) @@ -244,10 +274,3 @@ object TestsWorkspace { } } } - -//TODO DivPlugin should not used MixedDivider (double twoComplement) -//TODO DivPlugin should register the twoComplement output before pipeline insertion -//TODO MulPlugin doesn't fit well on Artix (FMAX) -//TODO PcReg design is unoptimized by Artix synthesis -//TODO FMAX SRC mux + bipass mux prioriti -//TODO FMAX, isFiring is to pesimisstinc in some cases(include removeIt flushed ..) \ No newline at end of file diff --git a/src/main/scala/vexriscv/demo/GenSmallAndProductiveCfu.scala b/src/main/scala/vexriscv/demo/GenSmallAndProductiveCfu.scala index c500452..960242f 100644 --- a/src/main/scala/vexriscv/demo/GenSmallAndProductiveCfu.scala +++ b/src/main/scala/vexriscv/demo/GenSmallAndProductiveCfu.scala @@ -53,7 +53,7 @@ object GenSmallAndProductiveCfu extends App{ new CfuPlugin( stageCount = 1, allowZeroLatency = true, - encoding = M"000000-------------------0001011", +// encoding = M"000000-------------------0001011", busParameter = CfuBusParameter( CFU_VERSION = 0, CFU_INTERFACE_ID_W = 0, diff --git a/src/main/scala/vexriscv/demo/GenTwoStage.scala b/src/main/scala/vexriscv/demo/GenTwoStage.scala new file mode 100644 index 0000000..b3d0804 --- /dev/null +++ b/src/main/scala/vexriscv/demo/GenTwoStage.scala @@ -0,0 +1,71 @@ +package vexriscv.demo + +import spinal.core.SpinalVerilog +import vexriscv.{VexRiscv, VexRiscvConfig, plugin} +import vexriscv.plugin.{BranchPlugin, CsrPlugin, CsrPluginConfig, DBusSimplePlugin, DecoderSimplePlugin, DivPlugin, FullBarrelShifterPlugin, HazardSimplePlugin, IBusSimplePlugin, IntAluPlugin, LightShifterPlugin, MulPlugin, MulSimplePlugin, NONE, RegFilePlugin, SrcPlugin, YamlPlugin} + +object GenTwoStage extends App{ + def cpu(withMulDiv : Boolean, + bypass : Boolean, + barrielShifter : Boolean) = new VexRiscv( + config = VexRiscvConfig( + withMemoryStage = false, + withWriteBackStage = false, + plugins = List( + new IBusSimplePlugin( + resetVector = 0x80000000l, + cmdForkOnSecondStage = false, + cmdForkPersistence = false, + prediction = NONE, + catchAccessFault = false, + compressedGen = false, + injectorStage = false + ), + new DBusSimplePlugin( + catchAddressMisaligned = false, + catchAccessFault = false + ), + new CsrPlugin(CsrPluginConfig.smallest), + new DecoderSimplePlugin( + catchIllegalInstruction = false + ), + new RegFilePlugin( + regFileReadyKind = plugin.SYNC, + readInExecute = true, + zeroBoot = true, + x0Init = false + ), + new IntAluPlugin, + new SrcPlugin( + separatedAddSub = false, + executeInsertion = true + ), + new HazardSimplePlugin( + bypassExecute = bypass, + bypassMemory = false, + bypassWriteBack = false, + bypassWriteBackBuffer = bypass, + pessimisticUseSrc = false, + pessimisticWriteRegFile = false, + pessimisticAddressMatch = false + ), + new BranchPlugin( + earlyBranch = true, + catchAddressMisaligned = false + ), + new YamlPlugin("cpu0.yaml") + ) ++ (if(!withMulDiv) Nil else List( + new MulSimplePlugin, + new DivPlugin + )) ++ List(if(!barrielShifter) + new LightShifterPlugin + else + new FullBarrelShifterPlugin( + earlyInjection = true + ) + ) + ) + ) + + SpinalVerilog(cpu(false,false,false)) +} diff --git a/src/main/scala/vexriscv/demo/Linux.scala b/src/main/scala/vexriscv/demo/Linux.scala index c13ef12..0010fa3 100644 --- a/src/main/scala/vexriscv/demo/Linux.scala +++ b/src/main/scala/vexriscv/demo/Linux.scala @@ -134,7 +134,7 @@ make clean run IBUS=CACHED DBUS=CACHED DEBUG_PLUGIN=STD DHRYSTONE=yes SUPERVISO object LinuxGen { - def configFull(litex : Boolean, withMmu : Boolean) = { + def configFull(litex : Boolean, withMmu : Boolean, withSmp : Boolean = false) = { val config = VexRiscvConfig( plugins = List( //Uncomment the whole IBusSimplePlugin and comment IBusCachedPlugin if you want uncached iBus config @@ -201,6 +201,8 @@ object LinuxGen { catchAccessError = true, catchIllegal = true, catchUnaligned = true, + withExclusive = withSmp, + withInvalidate = withSmp, withLrSc = true, withAmo = true // ) diff --git a/src/main/scala/vexriscv/demo/SynthesisBench.scala b/src/main/scala/vexriscv/demo/SynthesisBench.scala index a9961a9..b6e9f2f 100644 --- a/src/main/scala/vexriscv/demo/SynthesisBench.scala +++ b/src/main/scala/vexriscv/demo/SynthesisBench.scala @@ -4,9 +4,11 @@ import spinal.core._ import spinal.lib._ import spinal.lib.eda.bench._ import spinal.lib.eda.icestorm.IcestormStdTargets +import spinal.lib.eda.xilinx.VivadoFlow import spinal.lib.io.InOutWrapper -import vexriscv.VexRiscv -import vexriscv.plugin.DecoderSimplePlugin +import vexriscv.plugin.CsrAccess.{READ_ONLY, READ_WRITE, WRITE_ONLY} +import vexriscv.{VexRiscv, VexRiscvConfig, plugin} +import vexriscv.plugin.{BranchPlugin, CsrPlugin, CsrPluginConfig, DBusSimplePlugin, DecoderSimplePlugin, FullBarrelShifterPlugin, HazardSimplePlugin, IBusSimplePlugin, IntAluPlugin, LightShifterPlugin, NONE, RegFilePlugin, SrcPlugin, YamlPlugin} import scala.collection.mutable.ArrayBuffer import scala.util.Random @@ -49,6 +51,42 @@ object VexRiscvSynthesisBench { // top // } + val twoStage = new Rtl { + override def getName(): String = "VexRiscv two stages" + override def getRtlPath(): String = "VexRiscvTwoStages.v" + SpinalVerilog(wrap(GenTwoStage.cpu( + withMulDiv = false, + bypass = false, + barrielShifter = false + )).setDefinitionName(getRtlPath().split("\\.").head)) + } + val twoStageBarell = new Rtl { + override def getName(): String = "VexRiscv two stages with barriel" + override def getRtlPath(): String = "VexRiscvTwoStagesBar.v" + SpinalVerilog(wrap(GenTwoStage.cpu( + withMulDiv = false, + bypass = true, + barrielShifter = true + )).setDefinitionName(getRtlPath().split("\\.").head)) + } + val twoStageMulDiv = new Rtl { + override def getName(): String = "VexRiscv two stages with Mul Div" + override def getRtlPath(): String = "VexRiscvTwoStagesMD.v" + SpinalVerilog(wrap(GenTwoStage.cpu( + withMulDiv = true, + bypass = false, + barrielShifter = false + )).setDefinitionName(getRtlPath().split("\\.").head)) + } + val twoStageAll = new Rtl { + override def getName(): String = "VexRiscv two stages with Mul Div fast" + override def getRtlPath(): String = "VexRiscvTwoStagesMDfast.v" + SpinalVerilog(wrap(GenTwoStage.cpu( + withMulDiv = true, + bypass = true, + barrielShifter = true + )).setDefinitionName(getRtlPath().split("\\.").head)) + } val smallestNoCsr = new Rtl { override def getName(): String = "VexRiscv smallest no CSR" override def getRtlPath(): String = "VexRiscvSmallestNoCsr.v" @@ -109,13 +147,71 @@ object VexRiscvSynthesisBench { SpinalConfig(inlineRom = true).generateVerilog(wrap(new VexRiscv(LinuxGen.configFull(false, true))).setDefinitionName(getRtlPath().split("\\.").head)) } - val rtls = List(smallestNoCsr, smallest, smallAndProductive, smallAndProductiveWithICache, fullNoMmuNoCache, noCacheNoMmuMaxPerf, fullNoMmuMaxPerf, fullNoMmu, full, linuxBalanced) -// val rtls = List(smallestNoCsr, smallest, smallAndProductive, smallAndProductiveWithICache) - // val rtls = List(smallAndProductive, smallAndProductiveWithICache, fullNoMmuMaxPerf, fullNoMmu, full) -// val rtls = List(smallAndProductive) + val linuxBalancedSmp = new Rtl { + override def getName(): String = "VexRiscv linux balanced SMP" + override def getRtlPath(): String = "VexRiscvLinuxBalancedSmp.v" + SpinalConfig(inlineRom = true).generateVerilog(wrap(new VexRiscv(LinuxGen.configFull(false, true, withSmp = true))).setDefinitionName(getRtlPath().split("\\.").head)) + } - val targets = XilinxStdTargets() ++ AlteraStdTargets() ++ IcestormStdTargets().take(1) + +// val rtls = List(twoStage, twoStageBarell, twoStageMulDiv, twoStageAll, smallestNoCsr, smallest, smallAndProductive, smallAndProductiveWithICache, fullNoMmuNoCache, noCacheNoMmuMaxPerf, fullNoMmuMaxPerf, fullNoMmu, full, linuxBalanced, linuxBalancedSmp) + val rtls = List(linuxBalanced, linuxBalancedSmp) +// val rtls = List(smallest) + val targets = XilinxStdTargets() ++ AlteraStdTargets() ++ IcestormStdTargets().take(1) ++ List( + new Target { + override def getFamilyName(): String = "Kintex UltraScale" + override def synthesise(rtl: Rtl, workspace: String): Report = { + VivadoFlow( + frequencyTarget = 50 MHz, + vivadoPath=sys.env.getOrElse("VIVADO_ARTIX_7_BIN", null), + workspacePath=workspace + "_area", + toplevelPath=rtl.getRtlPath(), + family=getFamilyName(), + device="xcku035-fbva900-3-e" + ) + } + }, + new Target { + override def getFamilyName(): String = "Kintex UltraScale" + override def synthesise(rtl: Rtl, workspace: String): Report = { + VivadoFlow( + frequencyTarget = 800 MHz, + vivadoPath=sys.env.getOrElse("VIVADO_ARTIX_7_BIN", null), + workspacePath=workspace + "_fmax", + toplevelPath=rtl.getRtlPath(), + family=getFamilyName(), + device="xcku035-fbva900-3-e" + ) + } + }, + new Target { + override def getFamilyName(): String = "Kintex UltraScale+" + override def synthesise(rtl: Rtl, workspace: String): Report = { + VivadoFlow( + frequencyTarget = 50 MHz, + vivadoPath=sys.env.getOrElse("VIVADO_ARTIX_7_BIN", null), + workspacePath=workspace + "_area", + toplevelPath=rtl.getRtlPath(), + family=getFamilyName(), + device="xcku3p-ffvd900-3-e" + ) + } + }, + new Target { + override def getFamilyName(): String = "Kintex UltraScale+" + override def synthesise(rtl: Rtl, workspace: String): Report = { + VivadoFlow( + frequencyTarget = 800 MHz, + vivadoPath=sys.env.getOrElse("VIVADO_ARTIX_7_BIN", null), + workspacePath=workspace + "_fmax", + toplevelPath=rtl.getRtlPath(), + family=getFamilyName(), + device="xcku3p-ffvd900-3-e" + ) + } + } + ) // val targets = IcestormStdTargets() Bench(rtls, targets) } @@ -184,4 +280,100 @@ object AllSynthesisBench { MuraxSynthesisBench.main(args) } +} + + + +object VexRiscvCustomSynthesisBench { + def main(args: Array[String]) { + + + def gen(csr : CsrPlugin) = new VexRiscv( + config = VexRiscvConfig( + plugins = List( + new IBusSimplePlugin( + resetVector = 0x80000000l, + cmdForkOnSecondStage = false, + cmdForkPersistence = false, + prediction = NONE, + catchAccessFault = false, + compressedGen = false + ), + new DBusSimplePlugin( + catchAddressMisaligned = false, + catchAccessFault = false + ), + new DecoderSimplePlugin( + catchIllegalInstruction = false + ), + new RegFilePlugin( + regFileReadyKind = plugin.SYNC, + zeroBoot = false + ), + new IntAluPlugin, + new SrcPlugin( + separatedAddSub = false, + executeInsertion = true + ), + csr, + new FullBarrelShifterPlugin(), + new HazardSimplePlugin( + bypassExecute = true, + bypassMemory = true, + bypassWriteBack = true, + bypassWriteBackBuffer = true, + pessimisticUseSrc = false, + pessimisticWriteRegFile = false, + pessimisticAddressMatch = false + ), + new BranchPlugin( + earlyBranch = false, + catchAddressMisaligned = false + ), + new YamlPlugin("cpu0.yaml") + ) + ) + ) + + + val fixedMtvec = new Rtl { + override def getName(): String = "Fixed MTVEC" + override def getRtlPath(): String = "fixedMtvec.v" + SpinalVerilog(gen(new CsrPlugin(CsrPluginConfig.smallest(0x80000000l))).setDefinitionName(getRtlPath().split("\\.").head)) + } + + val writeOnlyMtvec = new Rtl { + override def getName(): String = "write only MTVEC" + override def getRtlPath(): String = "woMtvec.v" + SpinalVerilog(gen(new CsrPlugin(CsrPluginConfig.smallest(null).copy(mtvecAccess = WRITE_ONLY))).setDefinitionName(getRtlPath().split("\\.").head)) + } + + val readWriteMtvec = new Rtl { + override def getName(): String = "read write MTVEC" + override def getRtlPath(): String = "wrMtvec.v" + SpinalVerilog(gen(new CsrPlugin(CsrPluginConfig.smallest(null).copy(mtvecAccess = READ_WRITE))).setDefinitionName(getRtlPath().split("\\.").head)) + } + + val fixedMtvecRoCounter = new Rtl { + override def getName(): String = "Fixed MTVEC, read only mcycle/minstret" + override def getRtlPath(): String = "fixedMtvecRoCounter.v" + SpinalVerilog(gen(new CsrPlugin(CsrPluginConfig.smallest(0x80000000l).copy(mcycleAccess = READ_ONLY, minstretAccess = READ_ONLY))).setDefinitionName(getRtlPath().split("\\.").head)) + } + + + val rwMtvecRoCounter = new Rtl { + override def getName(): String = "read write MTVEC, read only mcycle/minstret" + override def getRtlPath(): String = "readWriteMtvecRoCounter.v" + SpinalVerilog(gen(new CsrPlugin(CsrPluginConfig.smallest(null).copy(mtvecAccess = READ_WRITE, mcycleAccess = READ_ONLY, minstretAccess = READ_ONLY))).setDefinitionName(getRtlPath().split("\\.").head)) + } + + + // val rtls = List(twoStage, twoStageBarell, twoStageMulDiv, twoStageAll, smallestNoCsr, smallest, smallAndProductive, smallAndProductiveWithICache, fullNoMmuNoCache, noCacheNoMmuMaxPerf, fullNoMmuMaxPerf, fullNoMmu, full, linuxBalanced, linuxBalancedSmp) + val rtls = List(fixedMtvec, writeOnlyMtvec, readWriteMtvec,fixedMtvecRoCounter, rwMtvecRoCounter) + // val rtls = List(smallest) + val targets = XilinxStdTargets() ++ AlteraStdTargets() ++ IcestormStdTargets().take(1) + + // val targets = IcestormStdTargets() + Bench(rtls, targets) + } } \ No newline at end of file diff --git a/src/main/scala/vexriscv/demo/smp/Misc.scala b/src/main/scala/vexriscv/demo/smp/Misc.scala new file mode 100644 index 0000000..9980cf4 --- /dev/null +++ b/src/main/scala/vexriscv/demo/smp/Misc.scala @@ -0,0 +1,249 @@ +package vexriscv.demo.smp + + +import spinal.core._ +import spinal.lib.bus.bmb._ +import spinal.lib.bus.wishbone.{Wishbone, WishboneConfig, WishboneSlaveFactory} +import spinal.lib.com.jtag.Jtag +import spinal.lib._ +import spinal.lib.bus.bmb.sim.{BmbMemoryMultiPort, BmbMemoryTester} +import spinal.lib.bus.misc.{AddressMapping, DefaultMapping, SizeMapping} +import spinal.lib.eda.bench.Bench +import spinal.lib.misc.Clint +import spinal.lib.sim.{SimData, SparseMemory, StreamDriver, StreamMonitor, StreamReadyRandomizer} +import vexriscv.demo.smp.VexRiscvLitexSmpClusterOpenSbi.{cpuCount, parameter} +import vexriscv.demo.smp.VexRiscvSmpClusterGen.vexRiscvConfig +import vexriscv.{VexRiscv, VexRiscvConfig} +import vexriscv.plugin.{CsrPlugin, DBusCachedPlugin, DebugPlugin, IBusCachedPlugin} + +import scala.collection.mutable +import scala.util.Random + +case class LiteDramNativeParameter(addressWidth : Int, dataWidth : Int) + +case class LiteDramNativeCmd(p : LiteDramNativeParameter) extends Bundle{ + val we = Bool() + val addr = UInt(p.addressWidth bits) +} + +case class LiteDramNativeWData(p : LiteDramNativeParameter) extends Bundle{ + val data = Bits(p.dataWidth bits) + val we = Bits(p.dataWidth/8 bits) +} + +case class LiteDramNativeRData(p : LiteDramNativeParameter) extends Bundle{ + val data = Bits(p.dataWidth bits) +} + + +case class LiteDramNative(p : LiteDramNativeParameter) extends Bundle with IMasterSlave { + val cmd = Stream(LiteDramNativeCmd(p)) + val wdata = Stream(LiteDramNativeWData(p)) + val rdata = Stream(LiteDramNativeRData(p)) + override def asMaster(): Unit = { + master(cmd, wdata) + slave(rdata) + } + + def fromBmb(bmb : Bmb, wdataFifoSize : Int, rdataFifoSize : Int) = { + val bridge = BmbToLiteDram( + bmbParameter = bmb.p, + liteDramParameter = this.p, + wdataFifoSize = wdataFifoSize, + rdataFifoSize = rdataFifoSize + ) + bridge.io.input << bmb + bridge.io.output <> this + bridge + } + + def simSlave(ram : SparseMemory,cd : ClockDomain, bmb : Bmb = null): Unit ={ + import spinal.core.sim._ + def bus = this + case class Cmd(address : Long, we : Boolean) + case class WData(data : BigInt, we : Long) + val cmdQueue = mutable.Queue[Cmd]() + val wdataQueue = mutable.Queue[WData]() + val rdataQueue = mutable.Queue[BigInt]() + + + case class Ref(address : Long, data : BigInt, we : Long, time : Long) + val ref = mutable.Queue[Ref]() + if(bmb != null) StreamMonitor(bmb.cmd, cd){p => + if(bmb.cmd.opcode.toInt == 1) ref.enqueue(Ref(p.fragment.address.toLong, p.fragment.data.toBigInt, p.fragment.mask.toLong, simTime())) + } + + var writeCmdCounter, writeDataCounter = 0 + StreamReadyRandomizer(bus.cmd, cd).factor = 0.5f + StreamMonitor(bus.cmd, cd) { t => + cmdQueue.enqueue(Cmd(t.addr.toLong * (p.dataWidth/8) , t.we.toBoolean)) + if(t.we.toBoolean) writeCmdCounter += 1 + } + + StreamReadyRandomizer(bus.wdata, cd).factor = 0.5f + StreamMonitor(bus.wdata, cd) { p => + writeDataCounter += 1 + // if(p.data.toBigInt == BigInt("00000002000000020000000200000002",16)){ + // println("ASD") + // } + wdataQueue.enqueue(WData(p.data.toBigInt, p.we.toLong)) + } + + // new SimStreamAssert(cmd,cd) + // new SimStreamAssert(wdata,cd) + // new SimStreamAssert(rdata,cd) + + cd.onSamplings{ + if(writeDataCounter-writeCmdCounter > 2){ + println("miaou") + } + if(cmdQueue.nonEmpty && Random.nextFloat() < 0.5){ + val cmd = cmdQueue.head + if(cmd.we){ + if(wdataQueue.nonEmpty){ + // if(cmd.address == 0xc02ae850l) { + // println(s"! $writeCmdCounter $writeDataCounter") + // } + cmdQueue.dequeue() + val wdata = wdataQueue.dequeue() + val raw = wdata.data.toByteArray + val left = wdata.data.toByteArray.size-1 + if(bmb != null){ + assert(ref.nonEmpty) + assert((ref.head.address & 0xFFFFFFF0l) == cmd.address) + assert(ref.head.data == wdata.data) + assert(ref.head.we == wdata.we) + ref.dequeue() + } + // if(cmd.address == 0xc02ae850l) { + // println(s"$cmd $wdata ${simTime()}") + // } + for(i <- 0 until p.dataWidth/8){ + + + if(((wdata.we >> i) & 1) != 0) { + // if(cmd.address == 0xc02ae850l) { + // println(s"W $i ${ if (left - i >= 0) raw(left - i) else 0}") + // } + ram.write(cmd.address + i, if (left - i >= 0) raw(left - i) else 0) + } + } + } + } else { + cmdQueue.dequeue() + val value = new Array[Byte](p.dataWidth/8+1) + val left = value.size-1 + for(i <- 0 until p.dataWidth/8) { + value(left-i) = ram.read(cmd.address+i) + } + rdataQueue.enqueue(BigInt(value)) + } + } + } + + StreamDriver(bus.rdata, cd){ p => + if(rdataQueue.isEmpty){ + false + } else { + p.data #= rdataQueue.dequeue() + true + } + } + } +} + + + +case class BmbToLiteDram(bmbParameter : BmbParameter, + liteDramParameter : LiteDramNativeParameter, + wdataFifoSize : Int, + rdataFifoSize : Int) extends Component{ + val io = new Bundle { + val input = slave(Bmb(bmbParameter)) + val output = master(LiteDramNative(liteDramParameter)) + } + + val resized = io.input.resize(liteDramParameter.dataWidth) + val unburstified = resized.unburstify() + case class Context() extends Bundle { + val context = Bits(unburstified.p.contextWidth bits) + val source = UInt(unburstified.p.sourceWidth bits) + val isWrite = Bool() + } + + assert(isPow2(rdataFifoSize)) + val pendingRead = Reg(UInt(log2Up(rdataFifoSize) + 1 bits)) init(0) + + val halt = Bool() + val (cmdFork, dataFork) = StreamFork2(unburstified.cmd.haltWhen(halt)) + val outputCmd = Stream(LiteDramNativeCmd(liteDramParameter)) + outputCmd.arbitrationFrom(cmdFork.haltWhen(pendingRead.msb)) + outputCmd.addr := (cmdFork.address >> log2Up(liteDramParameter.dataWidth/8)).resized + outputCmd.we := cmdFork.isWrite + + io.output.cmd <-< outputCmd + + if(bmbParameter.canWrite) { + val wData = Stream(LiteDramNativeWData(liteDramParameter)) + wData.arbitrationFrom(dataFork.throwWhen(dataFork.isRead)) + wData.data := dataFork.data + wData.we := dataFork.mask + io.output.wdata << wData.queueLowLatency(wdataFifoSize, latency = 1) + } else { + dataFork.ready := True + io.output.wdata.valid := False + io.output.wdata.data.assignDontCare() + io.output.wdata.we.assignDontCare() + } + + val cmdContext = Stream(Context()) + cmdContext.valid := unburstified.cmd.fire + cmdContext.context := unburstified.cmd.context + cmdContext.source := unburstified.cmd.source + cmdContext.isWrite := unburstified.cmd.isWrite + halt := !cmdContext.ready + + val rspContext = cmdContext.queue(rdataFifoSize) + val rdataFifo = io.output.rdata.queueLowLatency(rdataFifoSize, latency = 1) + val writeTocken = CounterUpDown( + stateCount = rdataFifoSize*2, + incWhen = io.output.wdata.fire, + decWhen = rspContext.fire && rspContext.isWrite + ) + val canRspWrite = writeTocken =/= 0 + val canRspRead = CombInit(rdataFifo.valid) + + rdataFifo.ready := unburstified.rsp.fire && !rspContext.isWrite + rspContext.ready := unburstified.rsp.fire + unburstified.rsp.valid := rspContext.valid && (rspContext.isWrite ? canRspWrite | canRspRead) + unburstified.rsp.setSuccess() + unburstified.rsp.last := True + unburstified.rsp.source := rspContext.source + unburstified.rsp.context := rspContext.context + unburstified.rsp.data := rdataFifo.data + + + pendingRead := pendingRead + U(outputCmd.fire && !outputCmd.we) - U(rdataFifo.fire) +} + +object BmbToLiteDramTester extends App{ + import spinal.core.sim._ + SimConfig.withWave.compile(BmbToLiteDram( + bmbParameter = BmbParameter( + addressWidth = 20, + dataWidth = 32, + lengthWidth = 6, + sourceWidth = 4, + contextWidth = 16 + ), + liteDramParameter = LiteDramNativeParameter( + addressWidth = 20, + dataWidth = 128 + ), + wdataFifoSize = 16, + rdataFifoSize = 16 + )).doSimUntilVoid(seed = 42){dut => + val tester = new BmbMemoryTester(dut.io.input, dut.clockDomain, rspCounterTarget = 3000) + dut.io.output.simSlave(tester.memory.memory, dut.clockDomain) + } +} \ No newline at end of file diff --git a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala new file mode 100644 index 0000000..efd4010 --- /dev/null +++ b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala @@ -0,0 +1,622 @@ +package vexriscv.demo.smp + +import spinal.core +import spinal.core._ +import spinal.core.sim.{onSimEnd, simSuccess} +import spinal.lib._ +import spinal.lib.bus.bmb.sim.BmbMemoryAgent +import spinal.lib.bus.bmb.{Bmb, BmbArbiter, BmbDecoder, BmbExclusiveMonitor, BmbInvalidateMonitor, BmbParameter} +import spinal.lib.bus.misc.SizeMapping +import spinal.lib.com.jtag.{Jtag, JtagTapInstructionCtrl} +import spinal.lib.com.jtag.sim.JtagTcp +import spinal.lib.system.debugger.SystemDebuggerConfig +import vexriscv.ip.{DataCacheAck, DataCacheConfig, DataCacheMemBus, InstructionCache, InstructionCacheConfig} +import vexriscv.plugin.{BranchPlugin, CsrAccess, CsrPlugin, CsrPluginConfig, DBusCachedPlugin, DBusSimplePlugin, DYNAMIC_TARGET, DebugPlugin, DecoderSimplePlugin, FullBarrelShifterPlugin, HazardSimplePlugin, IBusCachedPlugin, IBusSimplePlugin, IntAluPlugin, MmuPlugin, MmuPortConfig, MulDivIterativePlugin, MulPlugin, RegFilePlugin, STATIC, SrcPlugin, YamlPlugin} +import vexriscv.{Riscv, VexRiscv, VexRiscvConfig, plugin} + +import scala.collection.mutable +import scala.collection.mutable.ArrayBuffer + + +case class VexRiscvSmpClusterParameter( cpuConfigs : Seq[VexRiscvConfig]) + +case class VexRiscvSmpCluster(p : VexRiscvSmpClusterParameter, + debugClockDomain : ClockDomain) extends Component{ + val dBusParameter = p.cpuConfigs.head.plugins.find(_.isInstanceOf[DBusCachedPlugin]).get.asInstanceOf[DBusCachedPlugin].config.getBmbParameter() + val dBusArbiterParameter = dBusParameter.copy(sourceWidth = log2Up(p.cpuConfigs.size)) + val exclusiveMonitorParameter = dBusArbiterParameter + val invalidateMonitorParameter = BmbExclusiveMonitor.outputParameter(exclusiveMonitorParameter) + val dMemParameter = BmbInvalidateMonitor.outputParameter(invalidateMonitorParameter) + + val iBusParameter = p.cpuConfigs.head.plugins.find(_.isInstanceOf[IBusCachedPlugin]).get.asInstanceOf[IBusCachedPlugin].config.getBmbParameter() + val iBusArbiterParameter = iBusParameter//.copy(sourceWidth = log2Up(p.cpuConfigs.size)) + val iMemParameter = iBusArbiterParameter + + val io = new Bundle { + val dMem = master(Bmb(dMemParameter)) +// val iMem = master(Bmb(iMemParameter)) + val iMems = Vec(master(Bmb(iMemParameter)), p.cpuConfigs.size) + val timerInterrupts = in Bits(p.cpuConfigs.size bits) + val externalInterrupts = in Bits(p.cpuConfigs.size bits) + val softwareInterrupts = in Bits(p.cpuConfigs.size bits) + val externalSupervisorInterrupts = in Bits(p.cpuConfigs.size bits) + val debugBus = slave(Bmb(SystemDebuggerConfig().getBmbParameter.copy(addressWidth = 20))) + val debugReset = out Bool() + val time = in UInt(64 bits) + } + + io.debugReset := False + val cpus = for((cpuConfig, cpuId) <- p.cpuConfigs.zipWithIndex) yield new Area{ + var iBus : Bmb = null + var dBus : Bmb = null + var debug : Bmb = null + cpuConfig.plugins.foreach { + case plugin: DebugPlugin => debugClockDomain{ + plugin.debugClockDomain = debugClockDomain + } + case _ => + } + cpuConfig.plugins += new DebugPlugin(debugClockDomain) + val core = new VexRiscv(cpuConfig) + core.plugins.foreach { + case plugin: IBusCachedPlugin => iBus = plugin.iBus.toBmb() + case plugin: DBusCachedPlugin => dBus = plugin.dBus.toBmb().pipelined(cmdValid = true) + case plugin: CsrPlugin => { + plugin.softwareInterrupt := io.softwareInterrupts(cpuId) + plugin.externalInterrupt := io.externalInterrupts(cpuId) + plugin.timerInterrupt := io.timerInterrupts(cpuId) + if (plugin.config.supervisorGen) plugin.externalInterruptS := io.externalSupervisorInterrupts(cpuId) + if (plugin.utime != null) plugin.utime := io.time + } + case plugin: DebugPlugin => debugClockDomain{ + io.debugReset setWhen(RegNext(plugin.io.resetOut)) + debug = plugin.io.bus.fromBmb() + } + case _ => + } + } + + val dBusArbiter = BmbArbiter( + p = dBusArbiterParameter, + portCount = cpus.size, + lowerFirstPriority = false, + inputsWithInv = cpus.map(_ => true), + inputsWithSync = cpus.map(_ => true), + pendingInvMax = 16 + ) + + (dBusArbiter.io.inputs, cpus).zipped.foreach(_ << _.dBus.pipelined(invValid = true, ackValid = true, syncValid = true)) + + val exclusiveMonitor = BmbExclusiveMonitor( + inputParameter = exclusiveMonitorParameter, + pendingWriteMax = 64 + ) + exclusiveMonitor.io.input << dBusArbiter.io.output.pipelined(cmdValid = true, cmdReady = true, rspValid = true) + + val invalidateMonitor = BmbInvalidateMonitor( + inputParameter = invalidateMonitorParameter, + pendingInvMax = 16 + ) + invalidateMonitor.io.input << exclusiveMonitor.io.output + + io.dMem << invalidateMonitor.io.output + + (io.iMems, cpus).zipped.foreach(_ << _.iBus) + + val debug = debugClockDomain on new Area{ + val arbiter = BmbDecoder( + p = io.debugBus.p, + mappings = List.tabulate(p.cpuConfigs.size)(i => SizeMapping(0x00000 + i*0x1000, 0x1000)), + capabilities = List.fill(p.cpuConfigs.size)(io.debugBus.p), + pendingMax = 2 + ) + arbiter.io.input << io.debugBus + (arbiter.io.outputs, cpus.map(_.debug)).zipped.foreach(_ >> _) + } +} + + + +object VexRiscvSmpClusterGen { + def vexRiscvConfig(hartId : Int, + ioRange : UInt => Bool = (x => x(31 downto 28) === 0xF), + resetVector : Long = 0x80000000l, + iBusWidth : Int = 128, + dBusWidth : Int = 64) = { + + val config = VexRiscvConfig( + plugins = List( + new MmuPlugin( + ioRange = ioRange + ), + //Uncomment the whole IBusCachedPlugin and comment IBusSimplePlugin if you want cached iBus config + new IBusCachedPlugin( + resetVector = resetVector, + compressedGen = false, + prediction = STATIC, + historyRamSizeLog2 = 9, + relaxPredictorAddress = true, + injectorStage = false, + relaxedPcCalculation = true, + config = InstructionCacheConfig( + cacheSize = 4096*2, + bytePerLine = 64, + wayCount = 2, + addressWidth = 32, + cpuDataWidth = 32, + memDataWidth = iBusWidth, + catchIllegalAccess = true, + catchAccessFault = true, + asyncTagMemory = false, + twoCycleRam = false, + twoCycleCache = true, + reducedBankWidth = true + ), + memoryTranslatorPortConfig = MmuPortConfig( + portTlbSize = 4, + latency = 1, + earlyRequireMmuLockup = true, + earlyCacheHits = true + ) + ), + new DBusCachedPlugin( + dBusCmdMasterPipe = dBusWidth == 32, + dBusCmdSlavePipe = true, + dBusRspSlavePipe = true, + relaxedMemoryTranslationRegister = true, + config = new DataCacheConfig( + cacheSize = 4096*2, + bytePerLine = 64, + wayCount = 2, + addressWidth = 32, + cpuDataWidth = 32, + memDataWidth = dBusWidth, + catchAccessError = true, + catchIllegal = true, + catchUnaligned = true, + withLrSc = true, + withAmo = true, + withExclusive = true, + withInvalidate = true, + aggregationWidth = if(dBusWidth == 32) 0 else log2Up(dBusWidth/8) + // ) + ), + memoryTranslatorPortConfig = MmuPortConfig( + portTlbSize = 4, + latency = 1, + earlyRequireMmuLockup = true, + earlyCacheHits = true + ) + ), + new DecoderSimplePlugin( + catchIllegalInstruction = true + ), + new RegFilePlugin( + regFileReadyKind = plugin.ASYNC, + zeroBoot = true, + x0Init = false + ), + new IntAluPlugin, + new SrcPlugin( + separatedAddSub = false + ), + new FullBarrelShifterPlugin(earlyInjection = false), + // new LightShifterPlugin, + new HazardSimplePlugin( + bypassExecute = true, + bypassMemory = true, + bypassWriteBack = true, + bypassWriteBackBuffer = true, + pessimisticUseSrc = false, + pessimisticWriteRegFile = false, + pessimisticAddressMatch = false + ), + new MulPlugin, + new MulDivIterativePlugin( + genMul = false, + genDiv = true, + mulUnrollFactor = 32, + divUnrollFactor = 1 + ), + new CsrPlugin(CsrPluginConfig.openSbi(mhartid = hartId, misa = Riscv.misaToInt("imas")).copy(utimeAccess = CsrAccess.READ_ONLY)), + new BranchPlugin( + earlyBranch = false, + catchAddressMisaligned = true, + fenceiGenAsAJump = false + ), + new YamlPlugin(s"cpu$hartId.yaml") + ) + ) + config + } + def vexRiscvCluster(cpuCount : Int, resetVector : Long = 0x80000000l) = VexRiscvSmpCluster( + debugClockDomain = ClockDomain.current.copy(reset = Bool().setName("debugResetIn")), + p = VexRiscvSmpClusterParameter( + cpuConfigs = List.tabulate(cpuCount) { + vexRiscvConfig(_, resetVector = resetVector) + } + ) + ) + def main(args: Array[String]): Unit = { + SpinalVerilog { + vexRiscvCluster(4) + } + } +} + + + +object VexRiscvSmpClusterTestInfrastructure{ + val REPORT_OFFSET = 0xF8000000 + val REPORT_THREAD_ID = 0x00 + val REPORT_THREAD_COUNT = 0x04 + val REPORT_END = 0x08 + val REPORT_BARRIER_START = 0x0C + val REPORT_BARRIER_END = 0x10 + val REPORT_CONSISTENCY_VALUES = 0x14 + + val PUTC = 0x00 + val GETC = 0x04 + val CLINT_ADDR = 0x10000 + val CLINT_IPI_ADDR = CLINT_ADDR+0x0000 + val CLINT_CMP_ADDR = CLINT_ADDR+0x4000 + val CLINT_TIME_ADDR = CLINT_ADDR+0xBFF8 + + def ram(dut : VexRiscvSmpCluster, withStall : Boolean) = { + import spinal.core.sim._ + val cpuCount = dut.cpus.size + val ram = new BmbMemoryAgent(0x100000000l){ + case class Report(hart : Int, code : Int, data : Int){ + override def toString: String = { + f"CPU:$hart%2d ${code}%3x -> $data%3d" + } + } + val reports = ArrayBuffer.fill(cpuCount)(ArrayBuffer[Report]()) + + + val writeTable = mutable.HashMap[Int, Int => Unit]() + val readTable = mutable.HashMap[Int, () => Int]() + def onWrite(address : Int)(body : Int => Unit) = writeTable(address) = body + def onRead(address : Int)(body : => Int) = readTable(address) = () => body + + var writeData = 0 + var readData = 0 + var reportWatchdog = 0 + val cpuEnd = Array.fill(cpuCount)(false) + val barriers = mutable.HashMap[Int, Int]() + var consistancyCounter = 0 + var consistancyLast = 0 + var consistancyA = 0 + var consistancyB = 0 + var consistancyAB = 0 + var consistancyNone = 0 + + onSimEnd{ + for((list, hart) <- reports.zipWithIndex){ + println(f"\n\n**** CPU $hart%2d ****") + for((report, reportId) <- list.zipWithIndex){ + println(f" $reportId%3d : ${report.code}%3x -> ${report.data}%3d") + } + } + + println(s"consistancy NONE:$consistancyNone A:$consistancyA B:$consistancyB AB:$consistancyAB") + } + + override def setByte(address: Long, value: Byte): Unit = { + if((address & 0xF0000000l) != 0xF0000000l) return super.setByte(address, value) + val byteId = address & 3 + val mask = 0xFF << (byteId*8) + writeData = (writeData & ~mask) | ((value.toInt << (byteId*8)) & mask) + if(byteId != 3) return + val offset = (address & ~0xF0000000l)-3 + // println(s"W[0x${offset.toHexString}] = $writeData @${simTime()}") + offset match { + case _ if offset >= 0x8000000 && offset < 0x9000000 => { + val report = Report( + hart = ((offset & 0xFF0000) >> 16).toInt, + code = (offset & 0x00FFFF).toInt, + data = writeData + ) +// println(report) + reports(report.hart) += report + reportWatchdog += 1 + import report._ + code match { + case REPORT_THREAD_ID => assert(data == hart) + case REPORT_THREAD_COUNT => assert(data == cpuCount) + case REPORT_END => assert(data == 0); assert(cpuEnd(hart) == false); cpuEnd(hart) = true; if(!cpuEnd.exists(_ == false)) simSuccess() + case REPORT_BARRIER_START => { + val counter = barriers.getOrElse(data, 0) + assert(counter < cpuCount) + barriers(data) = counter + 1 + } + case REPORT_BARRIER_END => { + val counter = barriers.getOrElse(data, 0) + assert(counter == cpuCount) + } + case REPORT_CONSISTENCY_VALUES => consistancyCounter match { + case 0 => { + consistancyCounter = 1 + consistancyLast = data + } + case 1 => { + consistancyCounter = 0 + (data, consistancyLast) match { + case (666, 0) => consistancyA += 1 + case (0, 666) => consistancyB += 1 + case (666, 666) => consistancyAB += 1 + case (0,0) => consistancyNone += 1; simFailure("Consistancy issue :(") + } + } + } + } + } + case _ => writeTable.get(offset.toInt) match { + case Some(x) => x(writeData) + case _ => simFailure(f"\n\nWrite at ${address-3}%8x with $writeData%8x") + } + } + } + + override def getByte(address: Long): Byte = { + if((address & 0xF0000000l) != 0xF0000000l) return super.getByte(address) + val byteId = address & 3 + val offset = (address & ~0xF0000000l) + if(byteId == 0) readData = readTable.get(offset.toInt) match { + case Some(x) => x() + case _ => simFailure(f"\n\nRead at $address%8x") + } + (readData >> (byteId*8)).toByte + } + + val clint = new { + val cmp = Array.fill(cpuCount)(0l) + var time = 0l + periodicaly(100){ + time += 10 + var timerInterrupts = 0l + for(i <- 0 until cpuCount){ + if(cmp(i) < time) timerInterrupts |= 1l << i + } + dut.io.timerInterrupts #= timerInterrupts + } + +// delayed(200*1000000){ +// dut.io.softwareInterrupts #= 0xE +// enableSimWave() +// println("force IPI") +// } + } + + onWrite(PUTC)(data => print(data.toChar)) + onRead(GETC)( if(System.in.available() != 0) System.in.read() else -1) + + dut.io.softwareInterrupts #= 0 + dut.io.timerInterrupts #= 0 + dut.io.externalInterrupts #= 0 + dut.io.externalSupervisorInterrupts #= 0 + onRead(CLINT_TIME_ADDR)(clint.time.toInt) + onRead(CLINT_TIME_ADDR+4)((clint.time >> 32).toInt) + for(hartId <- 0 until cpuCount){ + onWrite(CLINT_IPI_ADDR + hartId*4) {data => + val mask = 1l << hartId + val value = (dut.io.softwareInterrupts.toLong & ~mask) | (if(data == 1) mask else 0) + dut.io.softwareInterrupts #= value + } +// onRead(CLINT_CMP_ADDR + hartId*8)(clint.cmp(hartId).toInt) +// onRead(CLINT_CMP_ADDR + hartId*8+4)((clint.cmp(hartId) >> 32).toInt) + onWrite(CLINT_CMP_ADDR + hartId*8){data => clint.cmp(hartId) = (clint.cmp(hartId) & 0xFFFFFFFF00000000l) | data} + onWrite(CLINT_CMP_ADDR + hartId*8+4){data => clint.cmp(hartId) = (clint.cmp(hartId) & 0x00000000FFFFFFFFl) | (data.toLong << 32)} + } + + + + } + dut.io.iMems.foreach(ram.addPort(_,0,dut.clockDomain,true, withStall)) + ram.addPort(dut.io.dMem,0,dut.clockDomain,true, withStall) + ram + } + def init(dut : VexRiscvSmpCluster): Unit ={ + import spinal.core.sim._ + dut.clockDomain.forkStimulus(10) + dut.debugClockDomain.forkStimulus(10) + dut.io.debugBus.cmd.valid #= false + } +} + +object VexRiscvSmpClusterTest extends App{ + import spinal.core.sim._ + + val simConfig = SimConfig + simConfig.withWave + simConfig.allOptimisation + simConfig.addSimulatorFlag("--threads 1") + + val cpuCount = 4 + val withStall = true + + simConfig.compile(VexRiscvSmpClusterGen.vexRiscvCluster(cpuCount)).doSimUntilVoid(seed = 42){dut => + disableSimWave() + SimTimeout(100000000l*10*cpuCount) + dut.clockDomain.forkSimSpeedPrinter(1.0) + VexRiscvSmpClusterTestInfrastructure.init(dut) + val ram = VexRiscvSmpClusterTestInfrastructure.ram(dut, withStall) + ram.memory.loadBin(0x80000000l, "src/test/cpp/raw/smp/build/smp.bin") + periodicaly(20000*10){ + assert(ram.reportWatchdog != 0) + ram.reportWatchdog = 0 + } + } +} + +// echo "echo 10000 | dhrystone >> log" > test +// time sh test & +// top -b -n 1 + +// TODO +// MultiChannelFifo.toStream arbitration +// BmbDecoderOutOfOrder arbitration +// DataCache to bmb invalidation that are more than single line +object VexRiscvSmpClusterOpenSbi extends App{ + import spinal.core.sim._ + + val simConfig = SimConfig + simConfig.withWave + simConfig.allOptimisation + simConfig.addSimulatorFlag("--threads 1") + + val cpuCount = 2 + val withStall = false + + def gen = { + val dut = VexRiscvSmpClusterGen.vexRiscvCluster(cpuCount, resetVector = 0x80000000l) + dut.cpus.foreach{cpu => + cpu.core.children.foreach{ + case cache : InstructionCache => cache.io.cpu.decode.simPublic() + case _ => + } + } + dut + } + + simConfig.workspaceName("rawr_4c").compile(gen).doSimUntilVoid(seed = 42){dut => +// dut.clockDomain.forkSimSpeedPrinter(1.0) + VexRiscvSmpClusterTestInfrastructure.init(dut) + val ram = VexRiscvSmpClusterTestInfrastructure.ram(dut, withStall) +// ram.memory.loadBin(0x80000000l, "../opensbi/build/platform/spinal/vexriscv/sim/smp/firmware/fw_payload.bin") + +// ram.memory.loadBin(0x40F00000l, "/media/data/open/litex_smp/litex_vexriscv_smp/images/fw_jump.bin") +// ram.memory.loadBin(0x40000000l, "/media/data/open/litex_smp/litex_vexriscv_smp/images/Image") +// ram.memory.loadBin(0x40EF0000l, "/media/data/open/litex_smp/litex_vexriscv_smp/images/dtb") +// ram.memory.loadBin(0x41000000l, "/media/data/open/litex_smp/litex_vexriscv_smp/images/rootfs.cpio") + + ram.memory.loadBin(0x80000000l, "../opensbi/build/platform/spinal/vexriscv/sim/smp/firmware/fw_jump.bin") + ram.memory.loadBin(0xC0000000l, "../buildroot/output/images/Image") + ram.memory.loadBin(0xC1000000l, "../buildroot/output/images/dtb") + ram.memory.loadBin(0xC2000000l, "../buildroot/output/images/rootfs.cpio") + + import spinal.core.sim._ + var iMemReadBytes, dMemReadBytes, dMemWriteBytes, iMemSequencial,iMemRequests, iMemPrefetchHit = 0l + var reportTimer = 0 + var reportCycle = 0 + val iMemFetchDelta = mutable.HashMap[Long, Long]() + var iMemFetchDeltaSorted : Seq[(Long, Long)] = null + var dMemWrites, dMemWritesCached = 0l + val dMemWriteCacheCtx = List(4,8,16,32,64).map(bytes => new { + var counter = 0l + var address = 0l + val mask = ~((1 << log2Up(bytes))-1) + }) + + import java.io._ + val csv = new PrintWriter(new File("bench.csv" )) + val iMemCtx = Array.tabulate(cpuCount)(i => new { + var sequencialPrediction = 0l + val cache = dut.cpus(i).core.children.find(_.isInstanceOf[InstructionCache]).head.asInstanceOf[InstructionCache].io.cpu.decode + var lastAddress = 0l + }) + dut.clockDomain.onSamplings{ + dut.io.time #= simTime()/10 + + + for(i <- 0 until cpuCount; iMem = dut.io.iMems(i); ctx = iMemCtx(i)){ +// if(iMem.cmd.valid.toBoolean && iMem.cmd.ready.toBoolean){ +// val length = iMem.cmd.length.toInt + 1 +// val address = iMem.cmd.address.toLong +// iMemReadBytes += length +// iMemRequests += 1 +// } + if(ctx.cache.isValid.toBoolean && !ctx.cache.mmuRefilling.toBoolean && !ctx.cache.mmuException.toBoolean){ + val address = ctx.cache.physicalAddress.toLong + val length = ctx.cache.p.bytePerLine.toLong + val mask = ~(length-1) + if(ctx.cache.cacheMiss.toBoolean) { + iMemReadBytes += length + if ((address & mask) == (ctx.sequencialPrediction & mask)) { + iMemSequencial += 1 + } + } + if(!ctx.cache.isStuck.toBoolean) { + ctx.sequencialPrediction = address + length + } + } + + if(iMem.cmd.valid.toBoolean && iMem.cmd.ready.toBoolean){ + val address = iMem.cmd.address.toLong + iMemRequests += 1 + if(iMemCtx(i).lastAddress + ctx.cache.p.bytePerLine == address){ + iMemPrefetchHit += 1 + } + val delta = address-iMemCtx(i).lastAddress + iMemFetchDelta(delta) = iMemFetchDelta.getOrElse(delta, 0l) + 1l + if(iMemRequests % 1000 == 999) iMemFetchDeltaSorted = iMemFetchDelta.toSeq.sortBy(_._1) + iMemCtx(i).lastAddress = address + } + } + if(dut.io.dMem.cmd.valid.toBoolean && dut.io.dMem.cmd.ready.toBoolean){ + if(dut.io.dMem.cmd.opcode.toInt == Bmb.Cmd.Opcode.WRITE){ + dMemWriteBytes += dut.io.dMem.cmd.length.toInt+1 + val address = dut.io.dMem.cmd.address.toLong + dMemWrites += 1 + for(ctx <- dMemWriteCacheCtx){ + if((address & ctx.mask) == (ctx.address & ctx.mask)){ + ctx.counter += 1 + } else { + ctx.address = address + } + } + }else { + dMemReadBytes += dut.io.dMem.cmd.length.toInt+1 + for(ctx <- dMemWriteCacheCtx) ctx.address = -1 + } + } + reportTimer = reportTimer + 1 + reportCycle = reportCycle + 1 + if(reportTimer == 400000){ + reportTimer = 0 +// println(f"\n** c=${reportCycle} ir=${iMemReadBytes*1e-6}%5.2f dr=${dMemReadBytes*1e-6}%5.2f dw=${dMemWriteBytes*1e-6}%5.2f **\n") + + + csv.write(s"$reportCycle,$iMemReadBytes,$dMemReadBytes,$dMemWriteBytes,$iMemRequests,$iMemSequencial,$dMemWrites,${dMemWriteCacheCtx.map(_.counter).mkString(",")},$iMemPrefetchHit\n") + csv.flush() + reportCycle = 0 + iMemReadBytes = 0 + dMemReadBytes = 0 + dMemWriteBytes = 0 + iMemRequests = 0 + iMemSequencial = 0 + dMemWrites = 0 + iMemPrefetchHit = 0 + for(ctx <- dMemWriteCacheCtx) ctx.counter = 0 + } + } + + +// fork{ +// disableSimWave() +// val atMs = 3790 +// val durationMs = 5 +// sleep(atMs*1000000) +// enableSimWave() +// println("** enableSimWave **") +// sleep(durationMs*1000000) +// println("** disableSimWave **") +// while(true) { +// disableSimWave() +// sleep(100000 * 10) +// enableSimWave() +// sleep( 100 * 10) +// } +//// simSuccess() +// } + + fork{ + while(true) { + disableSimWave() + sleep(100000 * 10) + enableSimWave() + sleep( 100 * 10) + } + } + } +} \ No newline at end of file diff --git a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexCluster.scala b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexCluster.scala new file mode 100644 index 0000000..43ae242 --- /dev/null +++ b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexCluster.scala @@ -0,0 +1,305 @@ +package vexriscv.demo.smp + +import spinal.core._ +import spinal.lib.bus.bmb._ +import spinal.lib.bus.wishbone.{Wishbone, WishboneConfig, WishboneSlaveFactory} +import spinal.lib.com.jtag.{Jtag, JtagTapInstructionCtrl} +import spinal.lib._ +import spinal.lib.bus.bmb.sim.{BmbMemoryMultiPort, BmbMemoryTester} +import spinal.lib.bus.misc.{AddressMapping, DefaultMapping, SizeMapping} +import spinal.lib.eda.bench.Bench +import spinal.lib.misc.Clint +import spinal.lib.misc.plic.{PlicGatewayActiveHigh, PlicMapper, PlicMapping, PlicTarget} +import spinal.lib.sim.{SimData, SparseMemory, StreamDriver, StreamMonitor, StreamReadyRandomizer} +import spinal.lib.system.debugger.{JtagBridgeNoTap, SystemDebugger, SystemDebuggerConfig} +import vexriscv.demo.smp.VexRiscvLitexSmpClusterOpenSbi.{cpuCount, parameter} +import vexriscv.demo.smp.VexRiscvSmpClusterGen.vexRiscvConfig +import vexriscv.{VexRiscv, VexRiscvConfig} +import vexriscv.plugin.{CsrPlugin, DBusCachedPlugin, DebugPlugin, IBusCachedPlugin} + +import scala.collection.mutable +import scala.util.Random + + +case class VexRiscvLitexSmpClusterParameter( cluster : VexRiscvSmpClusterParameter, + liteDram : LiteDramNativeParameter, + liteDramMapping : AddressMapping) + +//addAttribute("""mark_debug = "true"""") +case class VexRiscvLitexSmpCluster(p : VexRiscvLitexSmpClusterParameter, + debugClockDomain : ClockDomain, + jtagClockDomain : ClockDomain) extends Component{ + + val peripheralWishboneConfig = WishboneConfig( + addressWidth = 30, + dataWidth = 32, + selWidth = 4, + useERR = true, + useBTE = true, + useCTI = true + ) + + val io = new Bundle { + val dMem = master(LiteDramNative(p.liteDram)) + val iMem = master(LiteDramNative(p.liteDram)) + val peripheral = master(Wishbone(peripheralWishboneConfig)) + val clint = slave(Wishbone(Clint.getWisboneConfig())) + val plic = slave(Wishbone(WishboneConfig(addressWidth = 20, dataWidth = 32))) + val interrupts = in Bits(32 bits) + val jtagInstruction = slave(JtagTapInstructionCtrl()) + val debugReset = out Bool() + } + val cpuCount = p.cluster.cpuConfigs.size + val clint = Clint(cpuCount) + clint.driveFrom(WishboneSlaveFactory(io.clint)) + + val cluster = VexRiscvSmpCluster(p.cluster, debugClockDomain) + cluster.io.debugReset <> io.debugReset + cluster.io.timerInterrupts <> B(clint.harts.map(_.timerInterrupt)) + cluster.io.softwareInterrupts <> B(clint.harts.map(_.softwareInterrupt)) + cluster.io.time := clint.time + + val debug = debugClockDomain on new Area{ + val jtagConfig = SystemDebuggerConfig() + val jtagBridge = new JtagBridgeNoTap( + c = jtagConfig, + jtagClockDomain = jtagClockDomain + ) + jtagBridge.io.ctrl << io.jtagInstruction + + val debugger = new SystemDebugger(jtagConfig) + debugger.io.remote <> jtagBridge.io.remote + + cluster.io.debugBus << debugger.io.mem.toBmb() + } + + val dBusDecoder = BmbDecoderOutOfOrder( + p = cluster.io.dMem.p, + mappings = Seq(DefaultMapping, p.liteDramMapping), + capabilities = Seq(cluster.io.dMem.p, cluster.io.dMem.p), + pendingRspTransactionMax = 32 + ) +// val dBusDecoder = BmbDecoderOut( +// p = cluster.io.dMem.p, +// mappings = Seq(DefaultMapping, p.liteDramMapping), +// capabilities = Seq(cluster.io.dMem.p, cluster.io.dMem.p), +// pendingMax = 31 +// ) + dBusDecoder.io.input << cluster.io.dMem.pipelined(cmdValid = true, cmdReady = true, rspValid = true) + val dMemBridge = io.dMem.fromBmb(dBusDecoder.io.outputs(1), wdataFifoSize = 32, rdataFifoSize = 32) + + val iBusArbiterParameter = cluster.iBusParameter.copy(sourceWidth = log2Up(cpuCount)) + val iBusArbiter = BmbArbiter( + p = iBusArbiterParameter, + portCount = cpuCount, + lowerFirstPriority = false + ) + + (iBusArbiter.io.inputs, cluster.io.iMems).zipped.foreach(_ << _.pipelined(cmdHalfRate = true, rspValid = true)) + + val iBusDecoder = BmbDecoder( + p = iBusArbiter.io.output.p, + mappings = Seq(DefaultMapping, p.liteDramMapping), + capabilities = Seq(iBusArbiterParameter, iBusArbiterParameter), + pendingMax = 15 + ) + iBusDecoder.io.input << iBusArbiter.io.output.pipelined(cmdValid = true) + + val iMem = LiteDramNative(p.liteDram) + io.iMem.fromBmb(iBusDecoder.io.outputs(1), wdataFifoSize = 0, rdataFifoSize = 32) + + + val iBusDecoderToPeripheral = iBusDecoder.io.outputs(0).resize(dataWidth = 32).pipelined(cmdHalfRate = true, rspValid = true) + val dBusDecoderToPeripheral = dBusDecoder.io.outputs(0).resize(dataWidth = 32).pipelined(cmdHalfRate = true, rspValid = true) + + val peripheralAccessLength = Math.max(iBusDecoder.io.outputs(0).p.lengthWidth, dBusDecoder.io.outputs(0).p.lengthWidth) + val peripheralArbiter = BmbArbiter( + p = dBusDecoder.io.outputs(0).p.copy( + sourceWidth = List(iBusDecoderToPeripheral, dBusDecoderToPeripheral).map(_.p.sourceWidth).max + 1, + contextWidth = List(iBusDecoderToPeripheral, dBusDecoderToPeripheral).map(_.p.contextWidth).max, + lengthWidth = peripheralAccessLength, + dataWidth = 32 + ), + portCount = 2, + lowerFirstPriority = true + ) + peripheralArbiter.io.inputs(0) << iBusDecoderToPeripheral + peripheralArbiter.io.inputs(1) << dBusDecoderToPeripheral + + val peripheralWishbone = peripheralArbiter.io.output.pipelined(cmdValid = true).toWishbone() + io.peripheral << peripheralWishbone + + val plic = new Area{ + val priorityWidth = 2 + + val gateways = for(i <- 1 until 32) yield PlicGatewayActiveHigh( + source = io.interrupts(i), + id = i, + priorityWidth = priorityWidth + ) + + val bus = WishboneSlaveFactory(io.plic) + + val targets = for(i <- 0 until cpuCount) yield new Area{ + val machine = PlicTarget( + gateways = gateways, + priorityWidth = priorityWidth + ) + val supervisor = PlicTarget( + gateways = gateways, + priorityWidth = priorityWidth + ) + + cluster.io.externalInterrupts(i) := machine.iep + cluster.io.externalSupervisorInterrupts(i) := supervisor.iep + } + + val bridge = PlicMapper(bus, PlicMapping.sifive)( + gateways = gateways, + targets = targets.flatMap(t => List(t.machine, t.supervisor)) + ) + } +} + +object VexRiscvLitexSmpClusterGen extends App { + for(cpuCount <- List(1,2,4,8)) { + def parameter = VexRiscvLitexSmpClusterParameter( + cluster = VexRiscvSmpClusterParameter( + cpuConfigs = List.tabulate(cpuCount) { hartId => + vexRiscvConfig( + hartId = hartId, + ioRange = address => address.msb, + resetVector = 0 + ) + } + ), + liteDram = LiteDramNativeParameter(addressWidth = 32, dataWidth = 128), + liteDramMapping = SizeMapping(0x40000000l, 0x40000000l) + ) + + def dutGen = { + val toplevel = VexRiscvLitexSmpCluster( + p = parameter, + debugClockDomain = ClockDomain.current.copy(reset = Bool().setName("debugResetIn")), + jtagClockDomain = ClockDomain.external("jtag", withReset = false) + ) + toplevel + } + + val genConfig = SpinalConfig().addStandardMemBlackboxing(blackboxByteEnables) + // genConfig.generateVerilog(Bench.compressIo(dutGen)) + genConfig.generateVerilog(dutGen.setDefinitionName(s"VexRiscvLitexSmpCluster_${cpuCount}c")) + } + +} + + +object VexRiscvLitexSmpClusterOpenSbi extends App{ + import spinal.core.sim._ + + val simConfig = SimConfig + simConfig.withWave + simConfig.allOptimisation + + val cpuCount = 2 + + def parameter = VexRiscvLitexSmpClusterParameter( + cluster = VexRiscvSmpClusterParameter( + cpuConfigs = List.tabulate(cpuCount) { hartId => + vexRiscvConfig( + hartId = hartId, + ioRange = address => address(31 downto 28) === 0xF, + resetVector = 0x80000000l + ) + } + ), + liteDram = LiteDramNativeParameter(addressWidth = 32, dataWidth = 128), + liteDramMapping = SizeMapping(0x80000000l, 0x70000000l) + ) + + def dutGen = { + val top = VexRiscvLitexSmpCluster( + p = parameter, + debugClockDomain = ClockDomain.current.copy(reset = Bool().setName("debugResetIn")), + jtagClockDomain = ClockDomain.external("jtag", withReset = false) + ) + top.rework{ + top.io.clint.setAsDirectionLess.allowDirectionLessIo + top.io.peripheral.setAsDirectionLess.allowDirectionLessIo.simPublic() + + val hit = (top.io.peripheral.ADR <<2 >= 0xF0010000l && top.io.peripheral.ADR<<2 < 0xF0020000l) + top.io.clint.CYC := top.io.peripheral.CYC && hit + top.io.clint.STB := top.io.peripheral.STB + top.io.clint.WE := top.io.peripheral.WE + top.io.clint.ADR := top.io.peripheral.ADR.resized + top.io.clint.DAT_MOSI := top.io.peripheral.DAT_MOSI + top.io.peripheral.DAT_MISO := top.io.clint.DAT_MISO + top.io.peripheral.ACK := top.io.peripheral.CYC && (!hit || top.io.clint.ACK) + top.io.peripheral.ERR := False + + top.dMemBridge.unburstified.cmd.simPublic() + } + top + } + simConfig.compile(dutGen).doSimUntilVoid(seed = 42){dut => + dut.clockDomain.forkStimulus(10) + fork { + dut.debugClockDomain.resetSim #= false + sleep (0) + dut.debugClockDomain.resetSim #= true + sleep (10) + dut.debugClockDomain.resetSim #= false + } + + + val ram = SparseMemory() + ram.loadBin(0x80000000l, "../opensbi/build/platform/spinal/vexriscv/sim/smp/firmware/fw_jump.bin") + ram.loadBin(0xC0000000l, "../buildroot/output/images/Image") + ram.loadBin(0xC1000000l, "../buildroot/output/images/dtb") + ram.loadBin(0xC2000000l, "../buildroot/output/images/rootfs.cpio") + + + dut.io.iMem.simSlave(ram, dut.clockDomain) + dut.io.dMem.simSlave(ram, dut.clockDomain, dut.dMemBridge.unburstified) + + dut.io.interrupts #= 0 + + dut.clockDomain.onFallingEdges{ + if(dut.io.peripheral.CYC.toBoolean){ + (dut.io.peripheral.ADR.toLong << 2) match { + case 0xF0000000l => print(dut.io.peripheral.DAT_MOSI.toLong.toChar) + case 0xF0000004l => dut.io.peripheral.DAT_MISO #= (if(System.in.available() != 0) System.in.read() else 0xFFFFFFFFl) + case _ => + } +// println(f"${dut.io.peripheral.ADR.toLong}%x") + } + } + +// fork{ +// disableSimWave() +// val atMs = 3790 +// val durationMs = 5 +// sleep(atMs*1000000l) +// enableSimWave() +// println("** enableSimWave **") +// sleep(durationMs*1000000l) +// println("** disableSimWave **") +// while(true) { +// disableSimWave() +// sleep(100000 * 10) +// enableSimWave() +// sleep( 100 * 10) +// } +// // simSuccess() +// } + + fork{ + while(true) { + disableSimWave() + sleep(100000 * 10) + enableSimWave() + sleep( 100 * 10) + } + } + } + } \ No newline at end of file diff --git a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexMpCluster.scala b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexMpCluster.scala new file mode 100644 index 0000000..3631cfc --- /dev/null +++ b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexMpCluster.scala @@ -0,0 +1,369 @@ +package vexriscv.demo.smp + +import spinal.core._ +import spinal.lib.bus.bmb._ +import spinal.lib.bus.wishbone.{Wishbone, WishboneConfig, WishboneSlaveFactory} +import spinal.lib.com.jtag.{Jtag, JtagTap, JtagTapInstructionCtrl} +import spinal.lib._ +import spinal.lib.blackbox.xilinx.s7.BSCANE2 +import spinal.lib.bus.bmb.sim.{BmbMemoryMultiPort, BmbMemoryTester} +import spinal.lib.bus.misc.{AddressMapping, DefaultMapping, SizeMapping} +import spinal.lib.com.jtag.sim.JtagTcp +import spinal.lib.com.jtag.xilinx.Bscane2BmbMaster +import spinal.lib.eda.bench.Bench +import spinal.lib.misc.Clint +import spinal.lib.misc.plic.{PlicGatewayActiveHigh, PlicMapper, PlicMapping, PlicTarget} +import spinal.lib.sim.{SimData, SparseMemory, StreamDriver, StreamMonitor, StreamReadyRandomizer} +import spinal.lib.system.debugger.{JtagBridgeNoTap, SystemDebugger, SystemDebuggerConfig} +import sun.jvm.hotspot.oops.DataLayout +import vexriscv.demo.smp.VexRiscvLitexSmpMpClusterOpenSbi.{cpuCount, parameter} +import vexriscv.demo.smp.VexRiscvSmpClusterGen.vexRiscvConfig +import vexriscv.{VexRiscv, VexRiscvConfig} +import vexriscv.plugin.{CsrPlugin, DBusCachedPlugin, DebugPlugin, IBusCachedPlugin} + +import scala.collection.mutable +import scala.util.Random + + +case class VexRiscvLitexSmpMpClusterParameter( cluster : VexRiscvSmpClusterParameter, + liteDram : LiteDramNativeParameter, + liteDramMapping : AddressMapping) + +//addAttribute("""mark_debug = "true"""") +class VexRiscvLitexSmpMpCluster(val p : VexRiscvLitexSmpMpClusterParameter, + val debugClockDomain : ClockDomain, + val jtagClockDomain : ClockDomain) extends Component{ + + val peripheralWishboneConfig = WishboneConfig( + addressWidth = 30, + dataWidth = 32, + selWidth = 4, + useERR = true, + useBTE = true, + useCTI = true + ) + + val cpuCount = p.cluster.cpuConfigs.size + + val io = new Bundle { + val dMem = Vec(master(LiteDramNative(p.liteDram)), cpuCount) + val iMem = Vec(master(LiteDramNative(p.liteDram)), cpuCount) + val peripheral = master(Wishbone(peripheralWishboneConfig)) + val clint = slave(Wishbone(Clint.getWisboneConfig())) + val plic = slave(Wishbone(WishboneConfig(addressWidth = 20, dataWidth = 32))) + val interrupts = in Bits(32 bits) + val jtagInstruction = slave(JtagTapInstructionCtrl()) + val debugReset = out Bool() + } + val clint = Clint(cpuCount) + clint.driveFrom(WishboneSlaveFactory(io.clint)) + + val cluster = VexRiscvSmpCluster(p.cluster, debugClockDomain) + cluster.io.debugReset <> io.debugReset + cluster.io.timerInterrupts <> B(clint.harts.map(_.timerInterrupt)) + cluster.io.softwareInterrupts <> B(clint.harts.map(_.softwareInterrupt)) + cluster.io.time := clint.time + + val debug = debugClockDomain on new Area{ + val jtagConfig = SystemDebuggerConfig() + + val jtagBridge = new JtagBridgeNoTap(jtagConfig, jtagClockDomain) + jtagBridge.io.ctrl << io.jtagInstruction + + val debugger = new SystemDebugger(jtagConfig) + debugger.io.remote <> jtagBridge.io.remote + + cluster.io.debugBus << debugger.io.mem.toBmb() + +// io.jtagInstruction.allowDirectionLessIo.setAsDirectionLess +// val bridge = Bscane2BmbMaster(1) +// cluster.io.debugBus << bridge.io.bmb + + +// val bscane2 = BSCANE2(usedId) +// val jtagClockDomain = ClockDomain(bscane2.TCK) +// +// val jtagBridge = new JtagBridgeNoTap(jtagConfig, jtagClockDomain) +// jtagBridge.io.ctrl << bscane2.toJtagTapInstructionCtrl() +// +// val debugger = new SystemDebugger(jtagConfig) +// debugger.io.remote <> jtagBridge.io.remote +// +// io.bmb << debugger.io.mem.toBmb() + } + + val dBusDecoder = BmbDecoderOutOfOrder( + p = cluster.io.dMem.p, + mappings = Seq(DefaultMapping, p.liteDramMapping), + capabilities = Seq(cluster.io.dMem.p, cluster.io.dMem.p), + pendingRspTransactionMax = 32 + ) +// val dBusDecoder = BmbDecoderOut( +// p = cluster.io.dMem.p, +// mappings = Seq(DefaultMapping, p.liteDramMapping), +// capabilities = Seq(cluster.io.dMem.p, cluster.io.dMem.p), +// pendingMax = 31 +// ) + dBusDecoder.io.input << cluster.io.dMem.pipelined(cmdValid = true, cmdReady = true, rspValid = true) + + + val perIBus = for(id <- 0 until cpuCount) yield new Area{ + val decoder = BmbDecoder( + p = cluster.io.iMems(id).p, + mappings = Seq(DefaultMapping, p.liteDramMapping), + capabilities = Seq(cluster.io.iMems(id).p,cluster.io.iMems(id).p), + pendingMax = 15 + ) + + decoder.io.input << cluster.io.iMems(id) + io.iMem(id).fromBmb(decoder.io.outputs(1).pipelined(cmdHalfRate = true), wdataFifoSize = 0, rdataFifoSize = 32) + val toPeripheral = decoder.io.outputs(0).resize(dataWidth = 32).pipelined(cmdHalfRate = true, rspValid = true) + } + + val dBusDecoderToPeripheral = dBusDecoder.io.outputs(0).resize(dataWidth = 32).pipelined(cmdHalfRate = true, rspValid = true) + + val peripheralAccessLength = Math.max(perIBus(0).toPeripheral.p.lengthWidth, dBusDecoder.io.outputs(0).p.lengthWidth) + val peripheralArbiter = BmbArbiter( + p = dBusDecoder.io.outputs(0).p.copy( + sourceWidth = List(perIBus(0).toPeripheral, dBusDecoderToPeripheral).map(_.p.sourceWidth).max + log2Up(cpuCount + 1), + contextWidth = List(perIBus(0).toPeripheral, dBusDecoderToPeripheral).map(_.p.contextWidth).max, + lengthWidth = peripheralAccessLength, + dataWidth = 32 + ), + portCount = cpuCount+1, + lowerFirstPriority = true + ) + + for(id <- 0 until cpuCount){ + peripheralArbiter.io.inputs(id) << perIBus(id).toPeripheral + } + peripheralArbiter.io.inputs(cpuCount) << dBusDecoderToPeripheral + + val peripheralWishbone = peripheralArbiter.io.output.pipelined(cmdValid = true).toWishbone() + io.peripheral << peripheralWishbone + + + val dBusDemux = BmbSourceDecoder(dBusDecoder.io.outputs(1).p) + dBusDemux.io.input << dBusDecoder.io.outputs(1).pipelined(cmdValid = true, cmdReady = true,rspValid = true) + val dMemBridge = for(id <- 0 until cpuCount) yield { + io.dMem(id).fromBmb(dBusDemux.io.outputs(id), wdataFifoSize = 32, rdataFifoSize = 32) + } + + + val plic = new Area{ + val priorityWidth = 2 + + val gateways = for(i <- 1 until 32) yield PlicGatewayActiveHigh( + source = io.interrupts(i), + id = i, + priorityWidth = priorityWidth + ) + + val bus = WishboneSlaveFactory(io.plic) + + val targets = for(i <- 0 until cpuCount) yield new Area{ + val machine = PlicTarget( + gateways = gateways, + priorityWidth = priorityWidth + ) + val supervisor = PlicTarget( + gateways = gateways, + priorityWidth = priorityWidth + ) + + cluster.io.externalInterrupts(i) := machine.iep + cluster.io.externalSupervisorInterrupts(i) := supervisor.iep + } + + val bridge = PlicMapper(bus, PlicMapping.sifive)( + gateways = gateways, + targets = targets.flatMap(t => List(t.machine, t.supervisor)) + ) + } +// +// io.dMem.foreach(_.cmd.valid.addAttribute("""mark_debug = "true"""")) +// io.dMem.foreach(_.cmd.ready.addAttribute("""mark_debug = "true"""")) +// io.iMem.foreach(_.cmd.valid.addAttribute("""mark_debug = "true"""")) +// io.iMem.foreach(_.cmd.ready.addAttribute("""mark_debug = "true"""")) +// +// cluster.io.dMem.cmd.valid.addAttribute("""mark_debug = "true"""") +// cluster.io.dMem.cmd.ready.addAttribute("""mark_debug = "true"""") +// cluster.io.dMem.rsp.valid.addAttribute("""mark_debug = "true"""") +// cluster.io.dMem.rsp.ready.addAttribute("""mark_debug = "true"""") +} + +object VexRiscvLitexSmpMpClusterGen extends App { + for(cpuCount <- List(1,2,4,8)) { + def parameter = VexRiscvLitexSmpMpClusterParameter( + cluster = VexRiscvSmpClusterParameter( + cpuConfigs = List.tabulate(cpuCount) { hartId => + vexRiscvConfig( + hartId = hartId, + ioRange = address => address.msb, + resetVector = 0 + ) + } + ), + liteDram = LiteDramNativeParameter(addressWidth = 32, dataWidth = 128), + liteDramMapping = SizeMapping(0x40000000l, 0x40000000l) + ) + + def dutGen = { + val toplevel = new VexRiscvLitexSmpMpCluster( + p = parameter, + debugClockDomain = ClockDomain.current.copy(reset = Bool().setName("debugResetIn")), + jtagClockDomain = ClockDomain.external("jtag", withReset = false) + ) + toplevel + } + + val genConfig = SpinalConfig().addStandardMemBlackboxing(blackboxByteEnables) + // genConfig.generateVerilog(Bench.compressIo(dutGen)) + genConfig.generateVerilog(dutGen.setDefinitionName(s"VexRiscvLitexSmpMpCluster_${cpuCount}c")) + } + +} + + +object VexRiscvLitexSmpMpClusterOpenSbi extends App{ + import spinal.core.sim._ + + val simConfig = SimConfig + simConfig.withWave + simConfig.withFstWave + simConfig.allOptimisation + + val cpuCount = 2 + + def parameter = VexRiscvLitexSmpMpClusterParameter( + cluster = VexRiscvSmpClusterParameter( + cpuConfigs = List.tabulate(cpuCount) { hartId => + vexRiscvConfig( + hartId = hartId, + ioRange = address => address(31 downto 28) === 0xF, + resetVector = 0x80000000l + ) + } + ), + liteDram = LiteDramNativeParameter(addressWidth = 32, dataWidth = 128), + liteDramMapping = SizeMapping(0x80000000l, 0x70000000l) + ) + + def dutGen = { + val top = new VexRiscvLitexSmpMpCluster( + p = parameter, + debugClockDomain = ClockDomain.current.copy(reset = Bool().setName("debugResetIn")), + jtagClockDomain = ClockDomain.external("jtag", withReset = false) + ){ + io.jtagInstruction.allowDirectionLessIo.setAsDirectionLess + val jtag = slave(Jtag()) + jtagClockDomain.readClockWire.setAsDirectionLess() := jtag.tck + val jtagLogic = jtagClockDomain on new Area{ + val tap = new JtagTap(jtag, 4) + val idcodeArea = tap.idcode(B"x10001FFF")(1) + val wrapper = tap.map(io.jtagInstruction, instructionId = 2) + } + } + top.rework{ + top.io.clint.setAsDirectionLess.allowDirectionLessIo + top.io.peripheral.setAsDirectionLess.allowDirectionLessIo.simPublic() + + val hit = (top.io.peripheral.ADR <<2 >= 0xF0010000l && top.io.peripheral.ADR<<2 < 0xF0020000l) + top.io.clint.CYC := top.io.peripheral.CYC && hit + top.io.clint.STB := top.io.peripheral.STB + top.io.clint.WE := top.io.peripheral.WE + top.io.clint.ADR := top.io.peripheral.ADR.resized + top.io.clint.DAT_MOSI := top.io.peripheral.DAT_MOSI + top.io.peripheral.DAT_MISO := top.io.clint.DAT_MISO + top.io.peripheral.ACK := top.io.peripheral.CYC && (!hit || top.io.clint.ACK) + top.io.peripheral.ERR := False + +// top.dMemBridge.unburstified.cmd.simPublic() + } + top + } + simConfig.compile(dutGen).doSimUntilVoid(seed = 42){dut => + dut.clockDomain.forkStimulus(10) + fork { + dut.debugClockDomain.resetSim #= false + sleep (0) + dut.debugClockDomain.resetSim #= true + sleep (10) + dut.debugClockDomain.resetSim #= false + } + + JtagTcp(dut.jtag, 10*20) + + val ram = SparseMemory() + ram.loadBin(0x80000000l, "../opensbi/build/platform/spinal/vexriscv/sim/smp/firmware/fw_jump.bin") + ram.loadBin(0xC0000000l, "../buildroot/output/images/Image") + ram.loadBin(0xC1000000l, "../buildroot/output/images/dtb") + ram.loadBin(0xC2000000l, "../buildroot/output/images/rootfs.cpio") + + for(id <- 0 until cpuCount) { + dut.io.iMem(id).simSlave(ram, dut.clockDomain) + dut.io.dMem(id).simSlave(ram, dut.clockDomain) + } + + dut.io.interrupts #= 0 + + +// val stdin = mutable.Queue[Byte]() +// def stdInPush(str : String) = stdin ++= str.toCharArray.map(_.toByte) +// fork{ +// sleep(4000*1000000l) +// stdInPush("root\n") +// sleep(1000*1000000l) +// stdInPush("ping localhost -i 0.01 > /dev/null &\n") +// stdInPush("ping localhost -i 0.01 > /dev/null &\n") +// stdInPush("ping localhost -i 0.01 > /dev/null &\n") +// stdInPush("ping localhost -i 0.01 > /dev/null &\n") +// sleep(500*1000000l) +// while(true){ +// sleep(500*1000000l) +// stdInPush("uptime\n") +// printf("\n** uptime **") +// } +// } + dut.clockDomain.onFallingEdges { + if (dut.io.peripheral.CYC.toBoolean) { + (dut.io.peripheral.ADR.toLong << 2) match { + case 0xF0000000l => print(dut.io.peripheral.DAT_MOSI.toLong.toChar) + case 0xF0000004l => dut.io.peripheral.DAT_MISO #= (if (System.in.available() != 0) System.in.read() else 0xFFFFFFFFl) + case _ => + // case 0xF0000004l => { + // val c = if(stdin.nonEmpty) { + // stdin.dequeue().toInt & 0xFF + // } else { + // 0xFFFFFFFFl + // } + // dut.io.peripheral.DAT_MISO #= c + // } + // case _ => + // } + // println(f"${dut.io.peripheral.ADR.toLong}%x") + } + } + } + + fork{ + val at = 0 + val duration = 1000 + while(simTime() < at*1000000l) { + disableSimWave() + sleep(100000 * 10) + enableSimWave() + sleep( 200 * 10) + } + println("\n\n********************") + sleep(duration*1000000l) + println("********************\n\n") + while(true) { + disableSimWave() + sleep(100000 * 10) + enableSimWave() + sleep( 400 * 10) + } + } + } +} \ No newline at end of file diff --git a/src/main/scala/vexriscv/ip/DataCache.scala b/src/main/scala/vexriscv/ip/DataCache.scala index 7cbe4ba..98ba0ab 100644 --- a/src/main/scala/vexriscv/ip/DataCache.scala +++ b/src/main/scala/vexriscv/ip/DataCache.scala @@ -5,7 +5,7 @@ import spinal.core._ import spinal.lib._ import spinal.lib.bus.amba4.axi.{Axi4Config, Axi4Shared} import spinal.lib.bus.avalon.{AvalonMM, AvalonMMConfig} -import spinal.lib.bus.bmb.{Bmb, BmbParameter} +import spinal.lib.bus.bmb.{Bmb, BmbCmd, BmbParameter} import spinal.lib.bus.wishbone.{Wishbone, WishboneConfig} import spinal.lib.bus.simple._ import vexriscv.plugin.DBusSimpleBus @@ -25,13 +25,25 @@ case class DataCacheConfig(cacheSize : Int, tagSizeShift : Int = 0, //Used to force infering ram withLrSc : Boolean = false, withAmo : Boolean = false, - mergeExecuteMemory : Boolean = false){ + withExclusive : Boolean = false, + withInvalidate : Boolean = false, + pendingMax : Int = 32, + directTlbHit : Boolean = false, + mergeExecuteMemory : Boolean = false, + aggregationWidth : Int = 0){ assert(!(mergeExecuteMemory && (earlyDataMux || earlyWaysHits))) assert(!(earlyDataMux && !earlyWaysHits)) + assert(isPow2(pendingMax)) + def withWriteResponse = withExclusive def burstSize = bytePerLine*8/memDataWidth - val burstLength = bytePerLine/(memDataWidth/8) + val burstLength = bytePerLine/(cpuDataWidth/8) def catchSomething = catchUnaligned || catchIllegal || catchAccessError - + def withInternalAmo = withAmo && !withExclusive + def withInternalLrSc = withLrSc && !withExclusive + def withExternalLrSc = withLrSc && withExclusive + def withExternalAmo = withAmo && withExclusive + def cpuDataBytes = cpuDataWidth/8 + def memDataBytes = memDataWidth/8 def getAxi4SharedConfig() = Axi4Config( addressWidth = addressWidth, dataWidth = memDataWidth, @@ -70,14 +82,19 @@ case class DataCacheConfig(cacheSize : Int, def getBmbParameter() = BmbParameter( addressWidth = 32, - dataWidth = 32, + dataWidth = memDataWidth, lengthWidth = log2Up(this.bytePerLine), sourceWidth = 0, - contextWidth = 1, + contextWidth = (if(!withWriteResponse) 1 else 0) + (if(cpuDataWidth != memDataWidth) log2Up(memDataBytes) else 0), canRead = true, canWrite = true, alignment = BmbParameter.BurstAlignement.LENGTH, - maximumPendingTransactionPerId = Int.MaxValue + maximumPendingTransactionPerId = Int.MaxValue, + canInvalidate = withInvalidate, + canSync = withInvalidate, + canExclusive = withExclusive, + invalidateLength = log2Up(this.bytePerLine), + invalidateAlignment = BmbParameter.BurstAlignement.LENGTH ) } @@ -88,12 +105,12 @@ object DataCacheCpuExecute{ case class DataCacheCpuExecute(p : DataCacheConfig) extends Bundle with IMasterSlave{ val isValid = Bool val address = UInt(p.addressWidth bit) - // val haltIt = Bool + val haltIt = Bool val args = DataCacheCpuExecuteArgs(p) override def asMaster(): Unit = { out(isValid, args, address) - // in(haltIt) + in(haltIt) } } @@ -107,47 +124,62 @@ case class DataCacheCpuExecuteArgs(p : DataCacheConfig) extends Bundle{ val swap = Bool() val alu = Bits(3 bits) } + + val totalyConsistent = Bool() //Only for AMO/LRSC } -case class DataCacheCpuMemory(p : DataCacheConfig) extends Bundle with IMasterSlave{ +case class DataCacheCpuMemory(p : DataCacheConfig, mmu : MemoryTranslatorBusParameter) extends Bundle with IMasterSlave{ val isValid = Bool val isStuck = Bool - val isRemoved = Bool val isWrite = Bool val address = UInt(p.addressWidth bit) - val mmuBus = MemoryTranslatorBus() + val mmuRsp = MemoryTranslatorRsp(mmu) override def asMaster(): Unit = { - out(isValid, isStuck, isRemoved, address) + out(isValid, isStuck, address) in(isWrite) - slave(mmuBus) + out(mmuRsp) } } +case class FenceFlags() extends Bundle { + val SW,SR,SO,SI,PW,PR,PO,PI = Bool() + val FM = Bits(4 bits) + + def SL = SR || SI + def SS = SW || SO + def PL = PR || PI + def PS = PW || PO + def forceAll(): Unit ={ + List(SW,SR,SO,SI,PW,PR,PO,PI).foreach(_ := True) + } + def clearAll(): Unit ={ + List(SW,SR,SO,SI,PW,PR,PO,PI).foreach(_ := False) + } +} + case class DataCacheCpuWriteBack(p : DataCacheConfig) extends Bundle with IMasterSlave{ - val isValid = Bool - val isStuck = Bool - val isUser = Bool - val haltIt = Bool - val isWrite = Bool + val isValid = Bool() + val isStuck = Bool() + val isUser = Bool() + val haltIt = Bool() + val isWrite = Bool() val data = Bits(p.cpuDataWidth bit) val address = UInt(p.addressWidth bit) - val mmuException, unalignedAccess , accessError = Bool - val clearLrsc = ifGen(p.withLrSc) {Bool} - - // val exceptionBus = if(p.catchSomething) Flow(ExceptionCause()) else null + val mmuException, unalignedAccess, accessError = Bool() + val keepMemRspData = Bool() //Used by external AMO to avoid having an internal buffer + val fence = FenceFlags() override def asMaster(): Unit = { - out(isValid,isStuck,isUser, address) - in(haltIt, data, mmuException, unalignedAccess, accessError, isWrite) - outWithNull(clearLrsc) + out(isValid,isStuck,isUser, address, fence) + in(haltIt, data, mmuException, unalignedAccess, accessError, isWrite, keepMemRspData) } } -case class DataCacheCpuBus(p : DataCacheConfig) extends Bundle with IMasterSlave{ +case class DataCacheCpuBus(p : DataCacheConfig, mmu : MemoryTranslatorBusParameter) extends Bundle with IMasterSlave{ val execute = DataCacheCpuExecute(p) - val memory = DataCacheCpuMemory(p) + val memory = DataCacheCpuMemory(p, mmu) val writeBack = DataCacheCpuWriteBack(p) val redo = Bool() @@ -165,24 +197,50 @@ case class DataCacheCpuBus(p : DataCacheConfig) extends Bundle with IMasterSlave case class DataCacheMemCmd(p : DataCacheConfig) extends Bundle{ val wr = Bool + val uncached = Bool val address = UInt(p.addressWidth bit) - val data = Bits(p.memDataWidth bits) - val mask = Bits(p.memDataWidth/8 bits) + val data = Bits(p.cpuDataWidth bits) + val mask = Bits(p.cpuDataWidth/8 bits) val length = UInt(log2Up(p.burstLength) bits) + val exclusive = p.withExclusive generate Bool() val last = Bool } case class DataCacheMemRsp(p : DataCacheConfig) extends Bundle{ + val aggregated = UInt(p.aggregationWidth bits) + val last = Bool() val data = Bits(p.memDataWidth bit) val error = Bool + val exclusive = p.withExclusive generate Bool() +} +case class DataCacheInv(p : DataCacheConfig) extends Bundle{ + val enable = Bool() + val address = UInt(p.addressWidth bit) +} +case class DataCacheAck(p : DataCacheConfig) extends Bundle{ + val hit = Bool() +} + +case class DataCacheSync(p : DataCacheConfig) extends Bundle{ + val aggregated = UInt(p.aggregationWidth bits) } case class DataCacheMemBus(p : DataCacheConfig) extends Bundle with IMasterSlave{ val cmd = Stream (DataCacheMemCmd(p)) val rsp = Flow (DataCacheMemRsp(p)) + val inv = p.withInvalidate generate Stream(Fragment(DataCacheInv(p))) + val ack = p.withInvalidate generate Stream(Fragment(DataCacheAck(p))) + val sync = p.withInvalidate generate Stream(DataCacheSync(p)) + override def asMaster(): Unit = { master(cmd) slave(rsp) + + if(p.withInvalidate) { + slave(inv) + master(ack) + slave(sync) + } } def toAxi4Shared(stageCmd : Boolean = false, pendingWritesMax : Int = 7): Axi4Shared = { @@ -221,15 +279,7 @@ case class DataCacheMemBus(p : DataCacheConfig) extends Bundle with IMasterSlave axi.r.ready := True axi.b.ready := True - - //TODO remove - val axi2 = cloneOf(axi) - // axi.arw >/-> axi2.arw - // axi.w >/-> axi2.w - // axi.r <-/< axi2.r - // axi.b <-/< axi2.b - axi2 << axi - axi2 + axi } @@ -315,57 +365,199 @@ case class DataCacheMemBus(p : DataCacheConfig) extends Bundle with IMasterSlave } - def toBmb() : Bmb = { + def toBmb(syncPendingMax : Int = 16, + timeoutCycles : Int = 16) : Bmb = new Area{ + setCompositeName(DataCacheMemBus.this, "Bridge", true) val pipelinedMemoryBusConfig = p.getBmbParameter() - val bus = Bmb(pipelinedMemoryBusConfig) + val bus = Bmb(pipelinedMemoryBusConfig).setCompositeName(this,"toBmb", true) + val aggregationMax = p.memDataBytes - bus.cmd.valid := cmd.valid - bus.cmd.last := cmd.last - bus.cmd.context(0) := cmd.wr - bus.cmd.opcode := (cmd.wr ? B(Bmb.Cmd.Opcode.WRITE) | B(Bmb.Cmd.Opcode.READ)) - bus.cmd.address := cmd.address.resized - bus.cmd.data := cmd.data - bus.cmd.length := (cmd.length << 2) | 3 //TODO better sub word access - bus.cmd.mask := cmd.mask + case class Context() extends Bundle{ + val isWrite = !p.withWriteResponse generate Bool() + val rspCount = (p.cpuDataWidth != p.memDataWidth) generate UInt(log2Up(aggregationMax) bits) + } - cmd.ready := bus.cmd.ready + val withoutWriteBuffer = if(p.cpuDataWidth == p.memDataWidth) new Area { + val busCmdContext = Context() - rsp.valid := bus.rsp.valid && !bus.rsp.context(0) + bus.cmd.valid := cmd.valid + bus.cmd.last := cmd.last + bus.cmd.opcode := (cmd.wr ? B(Bmb.Cmd.Opcode.WRITE) | B(Bmb.Cmd.Opcode.READ)) + bus.cmd.address := cmd.address.resized + bus.cmd.data := cmd.data + bus.cmd.length := (cmd.length << 2) | 3 + bus.cmd.mask := cmd.mask + if (p.withExclusive) bus.cmd.exclusive := cmd.exclusive + if (!p.withWriteResponse) busCmdContext.isWrite := cmd.wr + bus.cmd.context := B(busCmdContext) + + cmd.ready := bus.cmd.ready + if(p.withInvalidate) sync.arbitrationFrom(bus.sync) + } + + val withWriteBuffer = if(p.cpuDataWidth != p.memDataWidth) new Area { + val buffer = new Area { + val stream = cmd.toEvent().m2sPipe() + val address = Reg(UInt(p.addressWidth bits)) + val length = Reg(UInt(pipelinedMemoryBusConfig.lengthWidth bits)) + val write = Reg(Bool) + val exclusive = Reg(Bool) + val data = Reg(Bits(p.memDataWidth bits)) + val mask = Reg(Bits(p.memDataWidth/8 bits)) init(0) + } + + val aggregationRange = log2Up(p.memDataWidth/8)-1 downto log2Up(p.cpuDataWidth/8) + val tagRange = p.addressWidth-1 downto aggregationRange.high+1 + val aggregationEnabled = Reg(Bool) + val aggregationCounter = Reg(UInt(log2Up(aggregationMax) bits)) init(0) + val aggregationCounterFull = aggregationCounter === aggregationCounter.maxValue + val timer = Reg(UInt(log2Up(timeoutCycles)+1 bits)) init(0) + val timerFull = timer.msb + val hit = cmd.address(tagRange) === buffer.address(tagRange) + val canAggregate = cmd.valid && cmd.wr && !cmd.uncached && !cmd.exclusive && !timerFull && !aggregationCounterFull && (!buffer.stream.valid || aggregationEnabled && hit) + val doFlush = cmd.valid && !canAggregate || timerFull || aggregationCounterFull || !aggregationEnabled +// val canAggregate = False +// val doFlush = True + val busCmdContext = Context() + val halt = False + + when(cmd.fire){ + aggregationCounter := aggregationCounter + 1 + } + when(buffer.stream.valid && !timerFull){ + timer := timer + 1 + } + when(bus.cmd.fire || !buffer.stream.valid){ + buffer.mask := 0 + aggregationCounter := 0 + timer := 0 + } + + buffer.stream.ready := (bus.cmd.ready && doFlush || canAggregate) && !halt + bus.cmd.valid := buffer.stream.valid && doFlush && !halt + bus.cmd.last := True + bus.cmd.opcode := (buffer.write ? B(Bmb.Cmd.Opcode.WRITE) | B(Bmb.Cmd.Opcode.READ)) + bus.cmd.address := buffer.address + bus.cmd.length := buffer.length + bus.cmd.data := buffer.data + bus.cmd.mask := buffer.mask + + if (p.withExclusive) bus.cmd.exclusive := buffer.exclusive + bus.cmd.context.removeAssignments() := B(busCmdContext) + if (!p.withWriteResponse) busCmdContext.isWrite := bus.cmd.isWrite + busCmdContext.rspCount := aggregationCounter + + val aggregationSel = cmd.address(aggregationRange) + when(cmd.fire){ + val dIn = cmd.data.subdivideIn(8 bits) + val dReg = buffer.data.subdivideIn(8 bits) + for(byteId <- 0 until p.memDataBytes){ + when(aggregationSel === byteId / p.cpuDataBytes && cmd.mask(byteId % p.cpuDataBytes)){ + dReg.write(byteId, dIn(byteId % p.cpuDataBytes)) + buffer.mask(byteId) := True + } + } + } + + when(cmd.fire){ + buffer.write := cmd.wr + buffer.address := cmd.address.resized + buffer.length := (cmd.length << 2) | 3 + if (p.withExclusive) buffer.exclusive := cmd.exclusive + + when(cmd.wr && !cmd.uncached && !cmd.exclusive){ + aggregationEnabled := True + buffer.address(aggregationRange.high downto 0) := 0 + buffer.length := p.memDataBytes-1 + } otherwise { + aggregationEnabled := False + } + } + + + val rspCtx = bus.rsp.context.as(Context()) + rsp.aggregated := rspCtx.rspCount + + val syncLogic = p.withInvalidate generate new Area{ + val cmdCtx = Stream(UInt(log2Up(aggregationMax) bits)) + cmdCtx.valid := bus.cmd.fire && bus.cmd.isWrite + cmdCtx.payload := aggregationCounter + halt setWhen(!cmdCtx.ready) + + val syncCtx = cmdCtx.queueLowLatency(syncPendingMax, latency = 1) + syncCtx.ready := bus.sync.fire + + sync.arbitrationFrom(bus.sync) + sync.aggregated := syncCtx.payload + } + } + + + rsp.valid := bus.rsp.valid + if(!p.withWriteResponse) rsp.valid clearWhen(bus.rsp.context(0)) rsp.data := bus.rsp.data rsp.error := bus.rsp.isError + rsp.last := bus.rsp.last + if(p.withExclusive) rsp.exclusive := bus.rsp.exclusive bus.rsp.ready := True - bus - } + val invalidateLogic = p.withInvalidate generate new Area{ + val beatCountMinusOne = bus.inv.transferBeatCountMinusOne(p.bytePerLine) + val counter = Reg(UInt(widthOf(beatCountMinusOne) bits)) init(0) + + inv.valid := bus.inv.valid + inv.address := bus.inv.address + (counter << log2Up(p.bytePerLine)) + inv.enable := bus.inv.all + inv.last := counter === beatCountMinusOne + bus.inv.ready := inv.last && inv.ready + + if(widthOf(counter) != 0) when(inv.fire){ + counter := counter + 1 + when(inv.last){ + counter := 0 + } + } + + bus.ack.arbitrationFrom(ack.throwWhen(!ack.last)) + } + }.bus } +object DataCacheExternalAmoStates extends SpinalEnum{ + val LR_CMD, LR_RSP, SC_CMD, SC_RSP = newElement(); +} -class DataCache(p : DataCacheConfig) extends Component{ +//If external amo, mem rsp should stay +class DataCache(val p : DataCacheConfig, mmuParameter : MemoryTranslatorBusParameter) extends Component{ import p._ - assert(cpuDataWidth == memDataWidth) val io = new Bundle{ - val cpu = slave(DataCacheCpuBus(p)) + val cpu = slave(DataCacheCpuBus(p, mmuParameter)) val mem = master(DataCacheMemBus(p)) - // val flushDone = out Bool //It pulse at the same time than the manager.request.fire } val haltCpu = False val lineWidth = bytePerLine*8 val lineCount = cacheSize/bytePerLine - val wordWidth = Math.max(memDataWidth,cpuDataWidth) + val wordWidth = cpuDataWidth val wordWidthLog2 = log2Up(wordWidth) val wordPerLine = lineWidth/wordWidth val bytePerWord = wordWidth/8 val wayLineCount = lineCount/wayCount val wayLineLog2 = log2Up(wayLineCount) val wayWordCount = wayLineCount * wordPerLine + val memWordPerLine = lineWidth/memDataWidth val memTransactionPerLine = p.bytePerLine / (p.memDataWidth/8) + val bytePerMemWord = memDataWidth/8 + val wayMemWordCount = wayLineCount * memWordPerLine val tagRange = addressWidth-1 downto log2Up(wayLineCount*bytePerLine) val lineRange = tagRange.low-1 downto log2Up(bytePerLine) - val wordRange = log2Up(bytePerLine)-1 downto log2Up(bytePerWord) + val cpuWordRange = log2Up(bytePerLine)-1 downto log2Up(bytePerWord) + val memWordRange = log2Up(bytePerLine)-1 downto log2Up(bytePerMemWord) + val hitRange = tagRange.high downto lineRange.low + val memWordToCpuWordRange = log2Up(bytePerMemWord)-1 downto log2Up(bytePerWord) class LineInfo() extends Bundle{ @@ -374,6 +566,7 @@ class DataCache(p : DataCacheConfig) extends Component{ } val tagsReadCmd = Flow(UInt(log2Up(wayLineCount) bits)) + val tagsInvReadCmd = withInvalidate generate Flow(UInt(log2Up(wayLineCount) bits)) val tagsWriteCmd = Flow(new Bundle{ val way = Bits(wayCount bits) val address = UInt(log2Up(wayLineCount) bits) @@ -382,23 +575,26 @@ class DataCache(p : DataCacheConfig) extends Component{ val tagsWriteLastCmd = RegNext(tagsWriteCmd) - val dataReadCmd = Flow(UInt(log2Up(wayWordCount) bits)) + val dataReadCmd = Flow(UInt(log2Up(wayMemWordCount) bits)) val dataWriteCmd = Flow(new Bundle{ val way = Bits(wayCount bits) - val address = UInt(log2Up(wayWordCount) bits) - val data = Bits(wordWidth bits) - val mask = Bits(wordWidth/8 bits) + val address = UInt(log2Up(wayMemWordCount) bits) + val data = Bits(memDataWidth bits) + val mask = Bits(memDataWidth/8 bits) }) - val ways = for(i <- 0 until wayCount) yield new Area{ val tags = Mem(new LineInfo(), wayLineCount) - val data = Mem(Bits(wordWidth bit), wayWordCount) + val data = Mem(Bits(memDataWidth bit), wayMemWordCount) //Reads val tagsReadRsp = tags.readSync(tagsReadCmd.payload, tagsReadCmd.valid && !io.cpu.memory.isStuck) - val dataReadRsp = data.readSync(dataReadCmd.payload, dataReadCmd.valid && !io.cpu.memory.isStuck) + val dataReadRspMem = data.readSync(dataReadCmd.payload, dataReadCmd.valid && !io.cpu.memory.isStuck) + val dataReadRspSel = if(mergeExecuteMemory) io.cpu.writeBack.address else io.cpu.memory.address + val dataReadRsp = dataReadRspMem.subdivideIn(cpuDataWidth bits).read(dataReadRspSel(memWordToCpuWordRange)) + + val tagsInvReadRsp = withInvalidate generate tags.readSync(tagsInvReadCmd.payload, tagsInvReadCmd.valid) //Writes when(tagsWriteCmd.valid && tagsWriteCmd.way(i)){ @@ -427,43 +623,150 @@ class DataCache(p : DataCacheConfig) extends Component{ tagsReadCmd.valid := True dataReadCmd.valid := True tagsReadCmd.payload := io.cpu.execute.address(lineRange) - dataReadCmd.payload := io.cpu.execute.address(lineRange.high downto wordRange.low) + dataReadCmd.payload := io.cpu.execute.address(lineRange.high downto memWordRange.low) } def collisionProcess(readAddress : UInt, readMask : Bits): Bits ={ val ret = Bits(wayCount bits) + val readAddressAligned = (readAddress >> log2Up(memDataWidth/cpuDataWidth)) + val dataWriteMaskAligned = dataWriteCmd.mask.subdivideIn(memDataWidth/cpuDataWidth slices).read(readAddress(log2Up(memDataWidth/cpuDataWidth)-1 downto 0)) for(i <- 0 until wayCount){ - ret(i) := dataWriteCmd.valid && dataWriteCmd.way(i) && dataWriteCmd.address === readAddress && (readMask & dataWriteCmd.mask) =/= 0 + ret(i) := dataWriteCmd.valid && dataWriteCmd.way(i) && dataWriteCmd.address === readAddressAligned && (readMask & dataWriteMaskAligned) =/= 0 } ret } + + io.cpu.execute.haltIt := False + + val rspSync = True + val rspLast = True + val memCmdSent = RegInit(False) setWhen (io.mem.cmd.ready) clearWhen (!io.cpu.writeBack.isStuck) + val pending = withExclusive generate new Area{ + val counter = Reg(UInt(log2Up(pendingMax) + 1 bits)) init(0) + val counterNext = counter + U(io.mem.cmd.fire && io.mem.cmd.last) - ((io.mem.rsp.valid && io.mem.rsp.last) ? (io.mem.rsp.aggregated +^ 1) | 0) + counter := counterNext + + val done = RegNext(counterNext === 0) + val full = RegNext(counter.msb) //Has margin + val last = RegNext(counterNext === 1) //Equivalent to counter === 1 but pipelined + + if(!withInvalidate) { + io.cpu.execute.haltIt setWhen(full) + } + + rspSync clearWhen (!last || !memCmdSent) + rspLast clearWhen (!last) + } + + val sync = withInvalidate generate new Area{ + io.mem.sync.ready := True + val syncCount = io.mem.sync.aggregated +^ 1 + val syncContext = new Area{ + val history = Mem(Bool, pendingMax) + val wPtr, rPtr = Reg(UInt(log2Up(pendingMax)+1 bits)) init(0) + when(io.mem.cmd.fire && io.mem.cmd.wr){ + history.write(wPtr.resized, io.mem.cmd.uncached) + wPtr := wPtr + 1 + } + + when(io.mem.sync.fire){ + rPtr := rPtr + syncCount + } + val uncached = history.readAsync(rPtr.resized) + val full = RegNext(wPtr - rPtr >= pendingMax-1) + io.cpu.execute.haltIt setWhen(full) + } + + def pending(inc : Bool, dec : Bool) = new Area { + val pendingSync = Reg(UInt(log2Up(pendingMax) + 1 bits)) init(0) + val pendingSyncNext = pendingSync + U(io.mem.cmd.fire && io.mem.cmd.wr && inc) - ((io.mem.sync.fire && dec) ? syncCount | 0) + pendingSync := pendingSyncNext + } + + val writeCached = pending(inc = !io.mem.cmd.uncached, dec = !syncContext.uncached) + val writeUncached = pending(inc = io.mem.cmd.uncached, dec = syncContext.uncached) + + def track(load : Bool, uncached : Boolean) = new Area { + val counter = Reg(UInt(log2Up(pendingMax) + 1 bits)) init(0) + counter := counter - ((io.mem.sync.fire && counter =/= 0 && (if(uncached) syncContext.uncached else !syncContext.uncached)) ? syncCount | 0) + when(load){ counter := (if(uncached) writeUncached.pendingSyncNext else writeCached.pendingSyncNext) } + + val busy = counter =/= 0 + } + + val w2w = track(load = io.cpu.writeBack.fence.PW && io.cpu.writeBack.fence.SW, uncached = false) + val w2r = track(load = io.cpu.writeBack.fence.PW && io.cpu.writeBack.fence.SR, uncached = false) + val w2i = track(load = io.cpu.writeBack.fence.PW && io.cpu.writeBack.fence.SI, uncached = false) + val w2o = track(load = io.cpu.writeBack.fence.PW && io.cpu.writeBack.fence.SO, uncached = false) + val o2w = track(load = io.cpu.writeBack.fence.PO && io.cpu.writeBack.fence.SW, uncached = true) + val o2r = track(load = io.cpu.writeBack.fence.PO && io.cpu.writeBack.fence.SR, uncached = true) + //Assume o2i and o2o are ordered by the interconnect + + val notTotalyConsistent = w2w.busy || w2r.busy || w2i.busy || w2o.busy || o2w.busy || o2r.busy + } + + + + val stage0 = new Area{ val mask = io.cpu.execute.size.mux ( U(0) -> B"0001", U(1) -> B"0011", default -> B"1111" ) |<< io.cpu.execute.address(1 downto 0) - val colisions = collisionProcess(io.cpu.execute.address(lineRange.high downto wordRange.low), mask) + val dataColisions = collisionProcess(io.cpu.execute.address(lineRange.high downto cpuWordRange.low), mask) + val wayInvalidate = B(0, wayCount bits) //Used if invalidate enabled + + val isAmo = if(withAmo) io.cpu.execute.isAmo else False } val stageA = new Area{ def stagePipe[T <: Data](that : T) = if(mergeExecuteMemory) CombInit(that) else RegNextWhen(that, !io.cpu.memory.isStuck) val request = stagePipe(io.cpu.execute.args) val mask = stagePipe(stage0.mask) - io.cpu.memory.mmuBus.cmd.isValid := io.cpu.memory.isValid - io.cpu.memory.mmuBus.cmd.virtualAddress := io.cpu.memory.address - io.cpu.memory.mmuBus.cmd.bypassTranslation := False - io.cpu.memory.mmuBus.end := !io.cpu.memory.isStuck || io.cpu.memory.isRemoved io.cpu.memory.isWrite := request.wr - val wayHits = earlyWaysHits generate ways.map(way => (io.cpu.memory.mmuBus.rsp.physicalAddress(tagRange) === way.tagsReadRsp.address && way.tagsReadRsp.valid)) + val isAmo = if(withAmo) request.isAmo else False + val isLrsc = if(withAmo) request.isLrsc else False + val consistancyCheck = (withInvalidate || withWriteResponse) generate new Area { + val hazard = False + val w = sync.w2w.busy || sync.o2w.busy + val r = stagePipe(sync.w2r.busy || sync.o2r.busy) || sync.w2r.busy || sync.o2r.busy // As it use the cache, need to check against the execute stage status too + val o = CombInit(sync.w2o.busy) + val i = CombInit(sync.w2i.busy) + + val s = io.cpu.memory.mmuRsp.isIoAccess ? o | w + val l = io.cpu.memory.mmuRsp.isIoAccess ? i | r + + when(isAmo? (s || l) | (request.wr ? s | l)){ + hazard := True + } + when(request.totalyConsistent && (sync.notTotalyConsistent || io.cpu.writeBack.isValid && io.cpu.writeBack.isWrite)){ + hazard := True + } + } + + val wayHits = earlyWaysHits generate Bits(wayCount bits) + val indirectTlbHitGen = (earlyWaysHits && !directTlbHit) generate new Area { + wayHits := B(ways.map(way => (io.cpu.memory.mmuRsp.physicalAddress(tagRange) === way.tagsReadRsp.address && way.tagsReadRsp.valid))) + } + val directTlbHitGen = (earlyWaysHits && directTlbHit) generate new Area { + val wayTlbHits = for (way <- ways) yield for (tlb <- io.cpu.memory.mmuRsp.ways) yield { + way.tagsReadRsp.address === tlb.physical(tagRange) && tlb.sel + } + val translatedHits = B(wayTlbHits.map(_.orR)) + val bypassHits = B(ways.map(_.tagsReadRsp.address === io.cpu.memory.address(tagRange))) + wayHits := (io.cpu.memory.mmuRsp.bypassTranslation ? bypassHits | translatedHits) & B(ways.map(_.tagsReadRsp.valid)) + } + val dataMux = earlyDataMux generate MuxOH(wayHits, ways.map(_.dataReadRsp)) - val colisions = if(mergeExecuteMemory){ - stagePipe(stage0.colisions) + val wayInvalidate = stagePipe(stage0. wayInvalidate) + val dataColisions = if(mergeExecuteMemory){ + stagePipe(stage0.dataColisions) } else { //Assume the writeback stage will never be unstall memory acces while memory stage is stalled - stagePipe(stage0.colisions) | collisionProcess(io.cpu.memory.address(lineRange.high downto wordRange.low), mask) + stagePipe(stage0.dataColisions) | collisionProcess(io.cpu.memory.address(lineRange.high downto cpuWordRange.low), mask) } } @@ -472,35 +775,41 @@ class DataCache(p : DataCacheConfig) extends Component{ def ramPipe[T <: Data](that : T) = if(mergeExecuteMemory) CombInit(that) else RegNextWhen(that, !io.cpu.writeBack.isStuck) val request = RegNextWhen(stageA.request, !io.cpu.writeBack.isStuck) val mmuRspFreeze = False - val mmuRsp = RegNextWhen(io.cpu.memory.mmuBus.rsp, !io.cpu.writeBack.isStuck && !mmuRspFreeze) + val mmuRsp = RegNextWhen(io.cpu.memory.mmuRsp, !io.cpu.writeBack.isStuck && !mmuRspFreeze) val tagsReadRsp = ways.map(w => ramPipe(w.tagsReadRsp)) val dataReadRsp = !earlyDataMux generate ways.map(w => ramPipe(w.dataReadRsp)) - val waysHits = if(earlyWaysHits) stagePipe(B(stageA.wayHits)) else B(tagsReadRsp.map(tag => mmuRsp.physicalAddress(tagRange) === tag.address && tag.valid).asBits()) + val wayInvalidate = stagePipe(stageA. wayInvalidate) + val consistancyHazard = if(stageA.consistancyCheck != null) stagePipe(stageA.consistancyCheck.hazard) else False + val dataColisions = stagePipe(stageA.dataColisions) + val waysHitsBeforeInvalidate = if(earlyWaysHits) stagePipe(B(stageA.wayHits)) else B(tagsReadRsp.map(tag => mmuRsp.physicalAddress(tagRange) === tag.address && tag.valid).asBits()) + val waysHits = waysHitsBeforeInvalidate & ~wayInvalidate val waysHit = waysHits.orR val dataMux = if(earlyDataMux) stagePipe(stageA.dataMux) else MuxOH(waysHits, dataReadRsp) val mask = stagePipe(stageA.mask) - val colisions = stagePipe(stageA.colisions) //Loader interface val loaderValid = False - + val ioMemRspMuxed = io.mem.rsp.data.subdivideIn(cpuDataWidth bits).read(io.cpu.writeBack.address(memWordToCpuWordRange)) io.cpu.writeBack.haltIt := io.cpu.writeBack.isValid //Evict the cache after reset logics val flusher = new Area { val valid = RegInit(False) + val hold = False when(valid) { tagsWriteCmd.valid := valid tagsWriteCmd.address := mmuRsp.physicalAddress(lineRange) tagsWriteCmd.way.setAll() tagsWriteCmd.data.valid := False io.cpu.writeBack.haltIt := True - when(mmuRsp.physicalAddress(lineRange) =/= wayLineCount - 1) { - mmuRsp.physicalAddress.getDrivingReg(lineRange) := mmuRsp.physicalAddress(lineRange) + 1 - } otherwise { - valid := False + when(!hold) { + when(mmuRsp.physicalAddress(lineRange) =/= wayLineCount - 1) { + mmuRsp.physicalAddress.getDrivingReg(lineRange) := mmuRsp.physicalAddress(lineRange) + 1 + } otherwise { + valid := False + } } } @@ -515,23 +824,23 @@ class DataCache(p : DataCacheConfig) extends Component{ } } - - val lrsc = withLrSc generate new Area{ + val lrSc = withInternalLrSc generate new Area{ val reserved = RegInit(False) - when(io.cpu.writeBack.isValid && !io.cpu.writeBack.isStuck && !io.cpu.redo && request.isLrsc && !request.wr){ - reserved := True - } - when(io.cpu.writeBack.clearLrsc){ - reserved := False + when(io.cpu.writeBack.isValid && !io.cpu.writeBack.isStuck && request.isLrsc){ + reserved := !request.wr } } - val requestDataBypass = CombInit(request.data) val isAmo = if(withAmo) request.isAmo else False + val isAmoCached = if(withInternalAmo) isAmo else False + val isExternalLsrc = if(withExternalLrSc) request.isLrsc else False + val isExternalAmo = if(withExternalAmo) request.isAmo else False + + val requestDataBypass = CombInit(request.data) + import DataCacheExternalAmoStates._ val amo = withAmo generate new Area{ def rf = request.data - def mem = dataMux - + def mem = if(withInternalAmo) dataMux else ioMemRspMuxed val compare = request.amoCtrl.alu.msb val unsigned = request.amoCtrl.alu(2 downto 1) === B"11" val addSub = (rf.asSInt + Mux(compare, ~mem, mem).asSInt + Mux(compare, S(1), S(0))).asBits @@ -545,12 +854,30 @@ class DataCache(p : DataCacheConfig) extends Component{ B"011" -> (rf & mem), default -> (selectRf ? rf | mem) ) - val resultRegValid = RegNext(True) clearWhen(!io.cpu.writeBack.isStuck) - val resultReg = RegNext(result) + // val resultRegValid = RegNext(True) clearWhen(!io.cpu.writeBack.isStuck) + // val resultReg = RegNext(result) + val resultReg = Reg(Bits(32 bits)) + + val internal = withInternalAmo generate new Area{ + val resultRegValid = RegNext(io.cpu.writeBack.isStuck) + resultReg := result + } + val external = !withInternalAmo generate new Area{ + val state = RegInit(LR_CMD) + } } - val memCmdSent = RegInit(False) setWhen (io.mem.cmd.ready) clearWhen (!io.cpu.writeBack.isStuck) + val cpuWriteToCache = False + when(cpuWriteToCache){ + dataWriteCmd.valid setWhen(request.wr && waysHit) + dataWriteCmd.address := mmuRsp.physicalAddress(lineRange.high downto memWordRange.low) + dataWriteCmd.data.subdivideIn(cpuDataWidth bits).foreach(_ := requestDataBypass) + dataWriteCmd.mask := 0 + dataWriteCmd.mask.subdivideIn(cpuDataWidth/8 bits).write(io.cpu.writeBack.address(memWordToCpuWordRange), mask) + dataWriteCmd.way := waysHits + } + io.cpu.redo := False io.cpu.writeBack.accessError := False io.cpu.writeBack.mmuException := io.cpu.writeBack.isValid && (if(catchIllegal) mmuRsp.exception || (!mmuRsp.allowWrite && request.wr) || (!mmuRsp.allowRead && (!request.wr || isAmo)) else False) @@ -558,57 +885,89 @@ class DataCache(p : DataCacheConfig) extends Component{ io.cpu.writeBack.isWrite := request.wr io.mem.cmd.valid := False - io.mem.cmd.address.assignDontCare() - io.mem.cmd.length.assignDontCare() - io.mem.cmd.last.assignDontCare() + io.mem.cmd.address := mmuRsp.physicalAddress(tagRange.high downto cpuWordRange.low) @@ U(0, cpuWordRange.low bits) + io.mem.cmd.length := 0 + io.mem.cmd.last := True io.mem.cmd.wr := request.wr io.mem.cmd.mask := mask io.mem.cmd.data := requestDataBypass + io.mem.cmd.uncached := mmuRsp.isIoAccess + if(withExternalLrSc) io.mem.cmd.exclusive := request.isLrsc || isAmo + + val bypassCache = mmuRsp.isIoAccess || isExternalLsrc || isExternalAmo + + io.cpu.writeBack.keepMemRspData := False when(io.cpu.writeBack.isValid) { - when(mmuRsp.isIoAccess) { - io.cpu.writeBack.haltIt.clearWhen(request.wr ? io.mem.cmd.ready | io.mem.rsp.valid) + when(isExternalAmo){ + if(withExternalAmo) switch(amo.external.state){ + is(LR_CMD){ + io.mem.cmd.valid := True + io.mem.cmd.wr := False + when(io.mem.cmd.ready) { + amo.external.state := LR_RSP + } + } + is(LR_RSP){ + when(io.mem.rsp.valid && pending.last) { + amo.external.state := SC_CMD + amo.resultReg := amo.result + } + } + is(SC_CMD){ + io.mem.cmd.valid := True + when(io.mem.cmd.ready) { + amo.external.state := SC_RSP + } + } + is(SC_RSP){ + io.cpu.writeBack.keepMemRspData := True + when(io.mem.rsp.valid) { + amo.external.state := LR_CMD + when(io.mem.rsp.exclusive){ //Success + cpuWriteToCache := True + io.cpu.writeBack.haltIt := False + } + } + } + } + } elsewhen(mmuRsp.isIoAccess || isExternalLsrc) { + val waitResponse = !request.wr + if(withExternalLrSc) waitResponse setWhen(request.isLrsc) + + io.cpu.writeBack.haltIt.clearWhen(waitResponse ? (io.mem.rsp.valid && rspSync) | io.mem.cmd.ready) io.mem.cmd.valid := !memCmdSent - io.mem.cmd.address := mmuRsp.physicalAddress(tagRange.high downto wordRange.low) @@ U(0, wordRange.low bit) - io.mem.cmd.length := 0 - io.mem.cmd.last := True - if(withLrSc) when(request.isLrsc && !lrsc.reserved){ + if(withInternalLrSc) when(request.isLrsc && !lrSc.reserved){ io.mem.cmd.valid := False io.cpu.writeBack.haltIt := False } } otherwise { - when(waysHit || request.wr && !isAmo) { //Do not require a cache refill ? - //Data cache update - dataWriteCmd.valid setWhen(request.wr && waysHit) - dataWriteCmd.address := mmuRsp.physicalAddress(lineRange.high downto wordRange.low) - dataWriteCmd.data := requestDataBypass - dataWriteCmd.mask := mask - dataWriteCmd.way := waysHits + when(waysHit || request.wr && !isAmoCached) { //Do not require a cache refill ? + cpuWriteToCache := True //Write through io.mem.cmd.valid setWhen(request.wr) - io.mem.cmd.address := mmuRsp.physicalAddress(tagRange.high downto wordRange.low) @@ U(0, wordRange.low bit) + io.mem.cmd.address := mmuRsp.physicalAddress(tagRange.high downto cpuWordRange.low) @@ U(0, cpuWordRange.low bits) io.mem.cmd.length := 0 - io.mem.cmd.last := True io.cpu.writeBack.haltIt clearWhen(!request.wr || io.mem.cmd.ready) - if(withAmo) when(isAmo){ - when(!amo.resultRegValid) { + if(withInternalAmo) when(isAmo){ + when(!amo.internal.resultRegValid) { io.mem.cmd.valid := False dataWriteCmd.valid := False io.cpu.writeBack.haltIt := True } } - //On write to read colisions - when((!request.wr || isAmo) && (colisions & waysHits) =/= 0){ + //On write to read dataColisions + when((!request.wr || isAmoCached) && (dataColisions & waysHits) =/= 0){ io.cpu.redo := True if(withAmo) io.mem.cmd.valid := False } - if(withLrSc) when(request.isLrsc && !lrsc.reserved){ + if(withInternalLrSc) when(request.isLrsc && !lrSc.reserved){ io.mem.cmd.valid := False dataWriteCmd.valid := False io.cpu.writeBack.haltIt := False @@ -619,43 +978,45 @@ class DataCache(p : DataCacheConfig) extends Component{ io.mem.cmd.wr := False io.mem.cmd.address := mmuRsp.physicalAddress(tagRange.high downto lineRange.low) @@ U(0,lineRange.low bit) io.mem.cmd.length := p.burstLength-1 - io.mem.cmd.last := True loaderValid setWhen(io.mem.cmd.ready) } } } - when(mmuRsp.isIoAccess){ - io.cpu.writeBack.data := io.mem.rsp.data - if(catchAccessError) io.cpu.writeBack.accessError := io.mem.rsp.valid && io.mem.rsp.error + when(bypassCache){ + io.cpu.writeBack.data := ioMemRspMuxed + def isLast = if(pending != null) pending.last else True + if(catchAccessError) io.cpu.writeBack.accessError := !request.wr && isLast && io.mem.rsp.valid && io.mem.rsp.error } otherwise { - io.cpu.writeBack.data := dataMux + io.cpu.writeBack.data := dataMux if(catchAccessError) io.cpu.writeBack.accessError := (waysHits & B(tagsReadRsp.map(_.error))) =/= 0 } + if(withLrSc) when(request.isLrsc && request.wr){ + val success = if(withInternalLrSc)lrSc.reserved else io.mem.rsp.exclusive + io.cpu.writeBack.data := B(!success).resized + if(withExternalLrSc) when(io.cpu.writeBack.isValid && io.mem.rsp.valid && rspSync && success && waysHit){ + cpuWriteToCache := True + } + } + if(withAmo) when(request.isAmo){ + requestDataBypass := amo.resultReg + } + //remove side effects on exceptions - when(mmuRsp.refilling || io.cpu.writeBack.accessError || io.cpu.writeBack.mmuException || io.cpu.writeBack.unalignedAccess){ + when(consistancyHazard || mmuRsp.refilling || io.cpu.writeBack.accessError || io.cpu.writeBack.mmuException || io.cpu.writeBack.unalignedAccess){ io.mem.cmd.valid := False tagsWriteCmd.valid := False dataWriteCmd.valid := False loaderValid := False io.cpu.writeBack.haltIt := False + if(withInternalLrSc) lrSc.reserved := lrSc.reserved + if(withExternalAmo) amo.external.state := LR_CMD } - io.cpu.redo setWhen(io.cpu.writeBack.isValid && mmuRsp.refilling) + io.cpu.redo setWhen(io.cpu.writeBack.isValid && (mmuRsp.refilling || consistancyHazard)) assert(!(io.cpu.writeBack.isValid && !io.cpu.writeBack.haltIt && io.cpu.writeBack.isStuck), "writeBack stuck by another plugin is not allowed") - - if(withLrSc){ - when(request.isLrsc && request.wr){ - io.cpu.writeBack.data := (!lrsc.reserved).asBits.resized - } - } - if(withAmo){ - when(request.isAmo){ - requestDataBypass := amo.resultReg - } - } } val loader = new Area{ @@ -665,8 +1026,10 @@ class DataCache(p : DataCacheConfig) extends Component{ val counter = Counter(memTransactionPerLine) val waysAllocator = Reg(Bits(wayCount bits)) init(1) val error = RegInit(False) + val kill = False + val killReg = RegInit(False) setWhen(kill) - when(valid && io.mem.rsp.valid){ + when(valid && io.mem.rsp.valid && rspLast){ dataWriteCmd.valid := True dataWriteCmd.address := baseAddress(lineRange) @@ counter dataWriteCmd.data := io.mem.rsp.data @@ -676,19 +1039,22 @@ class DataCache(p : DataCacheConfig) extends Component{ counter.increment() } + val done = CombInit(counter.willOverflow) + if(withInvalidate) done setWhen(valid && pending.counter === 0) //Used to solve invalidate write request at the same time - when(counter.willOverflow){ + when(done){ valid := False //Update tags tagsWriteCmd.valid := True tagsWriteCmd.address := baseAddress(lineRange) - tagsWriteCmd.data.valid := True + tagsWriteCmd.data.valid := !(kill || killReg) tagsWriteCmd.data.address := baseAddress(tagRange) - tagsWriteCmd.data.error := error || io.mem.rsp.error + tagsWriteCmd.data.error := error || (io.mem.rsp.valid && io.mem.rsp.error) tagsWriteCmd.way := waysAllocator error := False + killReg := False } when(!valid){ @@ -698,4 +1064,61 @@ class DataCache(p : DataCacheConfig) extends Component{ io.cpu.redo setWhen(valid) stageB.mmuRspFreeze setWhen(stageB.loaderValid || valid) } + + val invalidate = withInvalidate generate new Area{ + val s0 = new Area{ + val input = io.mem.inv + tagsInvReadCmd.valid := input.fire + tagsInvReadCmd.payload := input.address(lineRange) + + val loaderTagHit = input.address(tagRange) === loader.baseAddress(tagRange) + val loaderLineHit = input.address(lineRange) === loader.baseAddress(lineRange) + when(input.valid && input.enable && loader.valid && loaderLineHit && loaderTagHit){ + loader.kill := True + } + } + val s1 = new Area{ + val input = s0.input.stage() + val loaderValid = RegNextWhen(loader.valid, s0.input.ready) + val loaderWay = RegNextWhen(loader.waysAllocator, s0.input.ready) + val loaderTagHit = RegNextWhen(s0.loaderTagHit, s0.input.ready) + val loaderLineHit = RegNextWhen(s0.loaderLineHit, s0.input.ready) + val invalidations = Bits(wayCount bits) + + var wayHits = B(ways.map(way => (input.address(tagRange) === way.tagsInvReadRsp.address && way.tagsInvReadRsp.valid))) & ~invalidations + + //Handle invalider read during loader write hazard + when(loaderValid && loaderLineHit && !loaderTagHit){ + wayHits \= wayHits & ~loaderWay + } + } + val s2 = new Area{ + val input = s1.input.stage() + val wayHits = RegNextWhen(s1.wayHits, s1.input.ready) + val wayHit = wayHits.orR + + when(input.valid && input.enable) { + //Manage invalidate write during cpu read hazard + when(input.address(lineRange) === io.cpu.execute.address(lineRange)) { + stage0.wayInvalidate := wayHits + } + + //Invalidate cache tag + when(wayHit) { + tagsWriteCmd.valid := True + stageB.flusher.hold := True + tagsWriteCmd.address := input.address(lineRange) + tagsWriteCmd.data.valid := False + tagsWriteCmd.way := wayHits + loader.done := False //Hold loader tags write + } + } + io.mem.ack.arbitrationFrom(input) + io.mem.ack.hit := wayHit + io.mem.ack.last := input.last + + //Manage invalidation read during write hazard + s1.invalidations := RegNextWhen((input.valid && input.enable && input.address(lineRange) === s0.input.address(lineRange)) ? wayHits | 0, s0.input.ready) + } + } } \ No newline at end of file diff --git a/src/main/scala/vexriscv/ip/InstructionCache.scala b/src/main/scala/vexriscv/ip/InstructionCache.scala index 4df0f79..dc97444 100644 --- a/src/main/scala/vexriscv/ip/InstructionCache.scala +++ b/src/main/scala/vexriscv/ip/InstructionCache.scala @@ -22,8 +22,10 @@ case class InstructionCacheConfig( cacheSize : Int, asyncTagMemory : Boolean, twoCycleCache : Boolean = true, twoCycleRam : Boolean = false, + twoCycleRamInnerMux : Boolean = false, preResetFlush : Boolean = false, - bypassGen : Boolean = false ){ + bypassGen : Boolean = false, + reducedBankWidth : Boolean = false){ assert(!(twoCycleRam && !twoCycleCache)) @@ -70,7 +72,7 @@ case class InstructionCacheConfig( cacheSize : Int, def getBmbParameter() = BmbParameter( addressWidth = 32, - dataWidth = 32, + dataWidth = memDataWidth, lengthWidth = log2Up(this.bytePerLine), sourceWidth = 0, contextWidth = 0, @@ -103,7 +105,7 @@ trait InstructionCacheCommons{ val cacheMiss, error, mmuRefilling, mmuException, isUser : Bool } -case class InstructionCacheCpuFetch(p : InstructionCacheConfig) extends Bundle with IMasterSlave with InstructionCacheCommons { +case class InstructionCacheCpuFetch(p : InstructionCacheConfig, mmuParameter : MemoryTranslatorBusParameter) extends Bundle with IMasterSlave with InstructionCacheCommons { val isValid = Bool() val isStuck = Bool() val isRemoved = Bool() @@ -111,16 +113,15 @@ case class InstructionCacheCpuFetch(p : InstructionCacheConfig) extends Bundle w val data = Bits(p.cpuDataWidth bits) val dataBypassValid = p.bypassGen generate Bool() val dataBypass = p.bypassGen generate Bits(p.cpuDataWidth bits) - val mmuBus = MemoryTranslatorBus() + val mmuRsp = MemoryTranslatorRsp(mmuParameter) val physicalAddress = UInt(p.addressWidth bits) val cacheMiss, error, mmuRefilling, mmuException, isUser = ifGen(!p.twoCycleCache)(Bool) - val haltIt = Bool() //Used to wait on the MMU rsp busy override def asMaster(): Unit = { out(isValid, isStuck, isRemoved, pc) - inWithNull(error,mmuRefilling,mmuException,data, cacheMiss,physicalAddress, haltIt) + inWithNull(error,mmuRefilling,mmuException,data, cacheMiss,physicalAddress) outWithNull(isUser, dataBypass, dataBypassValid) - slaveWithNull(mmuBus) + out(mmuRsp) } } @@ -140,9 +141,9 @@ case class InstructionCacheCpuDecode(p : InstructionCacheConfig) extends Bundle } } -case class InstructionCacheCpuBus(p : InstructionCacheConfig) extends Bundle with IMasterSlave{ +case class InstructionCacheCpuBus(p : InstructionCacheConfig, mmuParameter : MemoryTranslatorBusParameter) extends Bundle with IMasterSlave{ val prefetch = InstructionCacheCpuPrefetch(p) - val fetch = InstructionCacheCpuFetch(p) + val fetch = InstructionCacheCpuFetch(p, mmuParameter) val decode = InstructionCacheCpuDecode(p) val fill = Flow(UInt(p.addressWidth bits)) @@ -251,7 +252,7 @@ case class InstructionCacheMemBus(p : InstructionCacheConfig) extends Bundle wit def toBmb() : Bmb = { val busParameter = p.getBmbParameter - val bus = Bmb(busParameter) + val bus = Bmb(busParameter).setCompositeName(this,"toBmb", true) bus.cmd.arbitrationFrom(cmd) bus.cmd.opcode := Bmb.Cmd.Opcode.READ bus.cmd.address := cmd.address.resized @@ -276,34 +277,23 @@ case class InstructionCacheFlushBus() extends Bundle with IMasterSlave{ } } -class InstructionCache(p : InstructionCacheConfig) extends Component{ +class InstructionCache(p : InstructionCacheConfig, mmuParameter : MemoryTranslatorBusParameter) extends Component{ import p._ - assert(cpuDataWidth == memDataWidth, "Need testing") val io = new Bundle{ val flush = in Bool() - val cpu = slave(InstructionCacheCpuBus(p)) + val cpu = slave(InstructionCacheCpuBus(p, mmuParameter)) val mem = master(InstructionCacheMemBus(p)) } val lineWidth = bytePerLine*8 val lineCount = cacheSize/bytePerLine - val wordWidth = Math.max(memDataWidth,32) - val wordWidthLog2 = log2Up(wordWidth) - val wordPerLine = lineWidth/wordWidth + val cpuWordWidth = cpuDataWidth val memWordPerLine = lineWidth/memDataWidth - val bytePerWord = wordWidth/8 - val bytePerMemWord = memDataWidth/8 + val bytePerCpuWord = cpuWordWidth/8 val wayLineCount = lineCount/wayCount - val wayLineLog2 = log2Up(wayLineCount) - val wayWordCount = wayLineCount * wordPerLine val tagRange = addressWidth-1 downto log2Up(wayLineCount*bytePerLine) val lineRange = tagRange.low-1 downto log2Up(bytePerLine) - val wordRange = log2Up(bytePerLine)-1 downto log2Up(bytePerWord) - val memWordRange = log2Up(bytePerLine)-1 downto log2Up(bytePerMemWord) - val memWordToCpuWordRange = log2Up(bytePerMemWord)-1 downto log2Up(bytePerWord) - val tagLineRange = tagRange.high downto lineRange.low - val lineWordRange = lineRange.high downto wordRange.low case class LineTag() extends Bundle{ val valid = Bool @@ -311,17 +301,23 @@ class InstructionCache(p : InstructionCacheConfig) extends Component{ val address = UInt(tagRange.length bit) } + val bankCount = wayCount + val bankWidth = if(!reducedBankWidth) memDataWidth else Math.max(cpuDataWidth, memDataWidth/wayCount) + val bankByteSize = cacheSize/bankCount + val bankWordCount = bankByteSize*8/bankWidth + val bankWordToCpuWordRange = log2Up(bankWidth/8)-1 downto log2Up(bytePerCpuWord) + val memToBankRatio = bankWidth*bankCount / memDataWidth + + val banks = Seq.fill(bankCount)(Mem(Bits(bankWidth bits), bankWordCount)) val ways = Seq.fill(wayCount)(new Area{ val tags = Mem(LineTag(),wayLineCount) - val datas = Mem(Bits(memDataWidth bits),wayWordCount) if(preResetFlush){ tags.initBigInt(List.fill(wayLineCount)(BigInt(0))) } }) - io.cpu.fetch.haltIt := io.cpu.fetch.mmuBus.busy val lineLoader = new Area{ val fire = False @@ -369,7 +365,7 @@ class InstructionCache(p : InstructionCacheConfig) extends Component{ val write = new Area{ val tag = ways.map(_.tags.writePort) - val data = ways.map(_.datas.writePort) + val data = banks.map(_.writePort) } for(wayId <- 0 until wayCount){ @@ -380,13 +376,24 @@ class InstructionCache(p : InstructionCacheConfig) extends Component{ tag.data.valid := flushCounter.msb tag.data.error := hadError || io.mem.rsp.error tag.data.address := address(tagRange) - - val data = write.data(wayId) - data.valid := io.mem.rsp.valid && wayHit - data.address := address(lineRange) @@ wordIndex - data.data := io.mem.rsp.data } + for((writeBank, bankId) <- write.data.zipWithIndex){ + if(!reducedBankWidth) { + writeBank.valid := io.mem.rsp.valid && wayToAllocate === bankId + writeBank.address := address(lineRange) @@ wordIndex + writeBank.data := io.mem.rsp.data + } else { + val sel = U(bankId) - wayToAllocate.value + val groupSel = wayToAllocate(log2Up(bankCount)-1 downto log2Up(bankCount/memToBankRatio)) + val subSel = sel(log2Up(bankCount/memToBankRatio) -1 downto 0) + writeBank.valid := io.mem.rsp.valid && groupSel === (bankId >> log2Up(bankCount/memToBankRatio)) + writeBank.address := address(lineRange) @@ wordIndex @@ (subSel) + writeBank.data := io.mem.rsp.data.subdivideIn(bankCount/memToBankRatio slices)(subSel) + } + } + + when(io.mem.rsp.valid) { wordIndex := (wordIndex + 1).resized hadError.setWhen(io.mem.rsp.error) @@ -396,27 +403,32 @@ class InstructionCache(p : InstructionCacheConfig) extends Component{ } } - val fetchStage = new Area{ val read = new Area{ - val waysValues = for(way <- ways) yield new Area{ + val banksValue = for(bank <- banks) yield new Area{ + val dataMem = bank.readSync(io.cpu.prefetch.pc(lineRange.high downto log2Up(bankWidth/8)), !io.cpu.fetch.isStuck) + val data = if(!twoCycleRamInnerMux) dataMem.subdivideIn(cpuDataWidth bits).read(io.cpu.fetch.pc(bankWordToCpuWordRange)) else dataMem + } + + val waysValues = for((way, wayId) <- ways.zipWithIndex) yield new Area{ val tag = if(asyncTagMemory) { way.tags.readAsync(io.cpu.fetch.pc(lineRange)) }else { way.tags.readSync(io.cpu.prefetch.pc(lineRange), !io.cpu.fetch.isStuck) } - val data = way.datas.readSync(io.cpu.prefetch.pc(lineRange.high downto memWordRange.low), !io.cpu.fetch.isStuck) +// val data = CombInit(banksValue(wayId).data) } } val hit = (!twoCycleRam) generate new Area{ - val hits = read.waysValues.map(way => way.tag.valid && way.tag.address === io.cpu.fetch.mmuBus.rsp.physicalAddress(tagRange)) + val hits = read.waysValues.map(way => way.tag.valid && way.tag.address === io.cpu.fetch.mmuRsp.physicalAddress(tagRange)) val valid = Cat(hits).orR - val id = OHToUInt(hits) - val error = read.waysValues.map(_.tag.error).read(id) - val data = read.waysValues.map(_.data).read(id) - val word = if(cpuDataWidth == memDataWidth) CombInit(data) else data.subdivideIn(cpuDataWidth bits).read(io.cpu.fetch.pc(memWordToCpuWordRange)) + val wayId = OHToUInt(hits) + val bankId = if(!reducedBankWidth) wayId else (wayId >> log2Up(bankCount/memToBankRatio)) @@ ((wayId + (io.cpu.fetch.mmuRsp.physicalAddress(log2Up(bankWidth/8), log2Up(bankCount) bits))).resize(log2Up(bankCount/memToBankRatio))) + val error = read.waysValues.map(_.tag.error).read(wayId) + val data = read.banksValue.map(_.data).read(bankId) + val word = if(cpuDataWidth == memDataWidth || !twoCycleRamInnerMux) CombInit(data) else data.subdivideIn(cpuDataWidth bits).read(io.cpu.fetch.pc(bankWordToCpuWordRange)) io.cpu.fetch.data := (if(p.bypassGen) (io.cpu.fetch.dataBypassValid ? io.cpu.fetch.dataBypass | word) else word) if(twoCycleCache){ io.cpu.decode.data := RegNextWhen(io.cpu.fetch.data,!io.cpu.decode.isStuck) @@ -424,18 +436,14 @@ class InstructionCache(p : InstructionCacheConfig) extends Component{ } if(twoCycleRam && wayCount == 1){ - val cacheData = if(cpuDataWidth == memDataWidth) CombInit(read.waysValues.head.data) else read.waysValues.head.data.subdivideIn(cpuDataWidth bits).read(io.cpu.fetch.pc(memWordToCpuWordRange)) + val cacheData = if(cpuDataWidth == memDataWidth || !twoCycleRamInnerMux) CombInit(read.banksValue.head.data) else read.banksValue.head.data.subdivideIn(cpuDataWidth bits).read(io.cpu.fetch.pc(bankWordToCpuWordRange)) io.cpu.fetch.data := (if(p.bypassGen) (io.cpu.fetch.dataBypassValid ? io.cpu.fetch.dataBypass | cacheData) else cacheData) } - io.cpu.fetch.mmuBus.cmd.isValid := io.cpu.fetch.isValid - io.cpu.fetch.mmuBus.cmd.virtualAddress := io.cpu.fetch.pc - io.cpu.fetch.mmuBus.cmd.bypassTranslation := False - io.cpu.fetch.mmuBus.end := !io.cpu.fetch.isStuck || io.cpu.fetch.isRemoved - io.cpu.fetch.physicalAddress := io.cpu.fetch.mmuBus.rsp.physicalAddress + io.cpu.fetch.physicalAddress := io.cpu.fetch.mmuRsp.physicalAddress val resolution = ifGen(!twoCycleCache)( new Area{ - val mmuRsp = io.cpu.fetch.mmuBus.rsp + val mmuRsp = io.cpu.fetch.mmuRsp io.cpu.fetch.cacheMiss := !hit.valid io.cpu.fetch.error := hit.error @@ -448,7 +456,7 @@ class InstructionCache(p : InstructionCacheConfig) extends Component{ val decodeStage = ifGen(twoCycleCache) (new Area{ def stage[T <: Data](that : T) = RegNextWhen(that,!io.cpu.decode.isStuck) - val mmuRsp = stage(io.cpu.fetch.mmuBus.rsp) + val mmuRsp = stage(io.cpu.fetch.mmuRsp) val hit = if(!twoCycleRam) new Area{ val valid = stage(fetchStage.hit.valid) @@ -457,10 +465,11 @@ class InstructionCache(p : InstructionCacheConfig) extends Component{ val tags = fetchStage.read.waysValues.map(way => stage(way.tag)) val hits = tags.map(tag => tag.valid && tag.address === mmuRsp.physicalAddress(tagRange)) val valid = Cat(hits).orR - val id = OHToUInt(hits) - val error = tags(id).error - val data = fetchStage.read.waysValues.map(way => stage(way.data)).read(id) - val word = if(cpuDataWidth == memDataWidth) data else data.subdivideIn(cpuDataWidth bits).read(io.cpu.decode.pc(memWordToCpuWordRange)) + val wayId = OHToUInt(hits) + val bankId = if(!reducedBankWidth) wayId else (wayId >> log2Up(bankCount/memToBankRatio)) @@ ((wayId + (mmuRsp.physicalAddress(log2Up(bankWidth/8), log2Up(bankCount) bits))).resize(log2Up(bankCount/memToBankRatio))) + val error = tags(wayId).error + val data = fetchStage.read.banksValue.map(bank => stage(bank.data)).read(bankId) + val word = if(cpuDataWidth == memDataWidth || !twoCycleRamInnerMux) data else data.subdivideIn(cpuDataWidth bits).read(io.cpu.decode.pc(bankWordToCpuWordRange)) if(p.bypassGen) when(stage(io.cpu.fetch.dataBypassValid)){ word := stage(io.cpu.fetch.dataBypass) } diff --git a/src/main/scala/vexriscv/plugin/CfuPlugin.scala b/src/main/scala/vexriscv/plugin/CfuPlugin.scala index d5aaf1c..de0ae91 100644 --- a/src/main/scala/vexriscv/plugin/CfuPlugin.scala +++ b/src/main/scala/vexriscv/plugin/CfuPlugin.scala @@ -5,6 +5,7 @@ import spinal.core._ import spinal.lib._ import spinal.lib.bus.bmb.WeakConnector import spinal.lib.bus.misc.{AddressMapping, DefaultMapping} +import vexriscv.Riscv.IMM case class CfuPluginParameter( CFU_VERSION : Int, @@ -78,12 +79,20 @@ case class CfuBus(p : CfuBusParameter) extends Bundle with IMasterSlave{ } } +object CfuPlugin{ + object Input2Kind extends SpinalEnum{ + val RS, IMM_I = newElement() + } +} +case class CfuPluginEncoding(instruction : MaskedLiteral, + functionId : List[Range], + input2Kind : CfuPlugin.Input2Kind.E) class CfuPlugin( val stageCount : Int, val allowZeroLatency : Boolean, - val encoding : MaskedLiteral, - val busParameter : CfuBusParameter) extends Plugin[VexRiscv]{ + val busParameter : CfuBusParameter, + val encodings : List[CfuPluginEncoding] = null) extends Plugin[VexRiscv]{ def p = busParameter assert(p.CFU_INPUTS <= 2) @@ -99,7 +108,8 @@ class CfuPlugin( val stageCount : Int, val CFU_ENABLE = new Stageable(Bool()).setCompositeName(this, "CFU_ENABLE") val CFU_IN_FLIGHT = new Stageable(Bool()).setCompositeName(this, "CFU_IN_FLIGHT") - + val CFU_ENCODING = new Stageable(UInt(log2Up(encodings.size) bits)).setCompositeName(this, "CFU_ENCODING") + val CFU_INPUT_2_KIND = new Stageable(CfuPlugin.Input2Kind()).setCompositeName(this, "CFU_ENCODING") override def setup(pipeline: VexRiscv): Unit = { import pipeline._ @@ -111,17 +121,53 @@ class CfuPlugin( val stageCount : Int, val decoderService = pipeline.service(classOf[DecoderService]) decoderService.addDefault(CFU_ENABLE, False) - //custom-0 - decoderService.add(List( - encoding -> List( + for((encoding, id) <- encodings.zipWithIndex){ + var actions = List( CFU_ENABLE -> True, REGFILE_WRITE_VALID -> True, BYPASSABLE_EXECUTE_STAGE -> Bool(stageCount == 0), BYPASSABLE_MEMORY_STAGE -> Bool(stageCount <= 1), RS1_USE -> True, - RS2_USE -> True + CFU_ENCODING -> id, + CFU_INPUT_2_KIND -> encoding.input2Kind() ) - )) + + encoding.input2Kind match { + case CfuPlugin.Input2Kind.RS => + actions :+= RS2_USE -> True + case CfuPlugin.Input2Kind.IMM_I => + } + + decoderService.add( + key = encoding.instruction, + values = actions + ) + } + +// decoderService.add(List( +// //custom-0 +// M"-------------------------0001011" -> List( +// CFU_ENABLE -> True, +// REGFILE_WRITE_VALID -> True, +// BYPASSABLE_EXECUTE_STAGE -> Bool(stageCount == 0), +// BYPASSABLE_MEMORY_STAGE -> Bool(stageCount <= 1), +// RS1_USE -> True, +// RS2_USE -> True, +// CFU_IMM -> False +// ), +// +// //custom-1 +// M"-------------------------0101011" -> List( +// CFU_ENABLE -> True, +// REGFILE_WRITE_VALID -> True, +// BYPASSABLE_EXECUTE_STAGE -> Bool(stageCount == 0), +// BYPASSABLE_MEMORY_STAGE -> Bool(stageCount <= 1), +// RS1_USE -> True, +// CFU_IMM -> True +// ) +// )) + + } override def build(pipeline: VexRiscv): Unit = { @@ -139,11 +185,16 @@ class CfuPlugin( val stageCount : Int, bus.cmd.valid := (schedule || hold) && !fired arbitration.haltItself setWhen(bus.cmd.valid && !bus.cmd.ready) - bus.cmd.function_id := U(input(INSTRUCTION)(14 downto 12)).resized +// bus.cmd.function_id := U(input(INSTRUCTION)(14 downto 12)).resized + val functionsIds = encodings.map(e => U(Cat(e.functionId.map(r => input(INSTRUCTION)(r))), busParameter.CFU_FUNCTION_ID_W bits)) + bus.cmd.function_id := functionsIds.read(input(CFU_ENCODING)) bus.cmd.reorder_id := 0 bus.cmd.request_id := 0 if(p.CFU_INPUTS >= 1) bus.cmd.inputs(0) := input(RS1) - if(p.CFU_INPUTS >= 2) bus.cmd.inputs(1) := input(RS2) + if(p.CFU_INPUTS >= 2) bus.cmd.inputs(1) := input(CFU_INPUT_2_KIND).mux( + CfuPlugin.Input2Kind.RS -> input(RS2), + CfuPlugin.Input2Kind.IMM_I -> IMM(input(INSTRUCTION)).i_sext + ) } joinStage plug new Area{ diff --git a/src/main/scala/vexriscv/plugin/CsrPlugin.scala b/src/main/scala/vexriscv/plugin/CsrPlugin.scala index fce4c15..9dfad38 100644 --- a/src/main/scala/vexriscv/plugin/CsrPlugin.scala +++ b/src/main/scala/vexriscv/plugin/CsrPlugin.scala @@ -8,6 +8,7 @@ import vexriscv.plugin.IntAluPlugin.{ALU_BITWISE_CTRL, ALU_CTRL, AluBitwiseCtrlE import scala.collection.mutable.ArrayBuffer import scala.collection.mutable +import spinal.core.sim._ /** * Created by spinalvm on 21.03.17. @@ -38,7 +39,7 @@ case class CsrPluginConfig( marchid : BigInt, mimpid : BigInt, mhartid : BigInt, - misaExtensionsInit : Int, + misaExtensionsInit : Int, misaAccess : CsrAccess, mtvecAccess : CsrAccess, mtvecInit : BigInt, @@ -65,8 +66,11 @@ case class CsrPluginConfig( scycleAccess : CsrAccess = CsrAccess.NONE, sinstretAccess : CsrAccess = CsrAccess.NONE, satpAccess : CsrAccess = CsrAccess.NONE, + utimeAccess :CsrAccess = CsrAccess.NONE, medelegAccess : CsrAccess = CsrAccess.NONE, midelegAccess : CsrAccess = CsrAccess.NONE, + withExternalMhartid : Boolean = false, + mhartidWidth : Int = 0, pipelineCsrRead : Boolean = false, pipelinedInterrupt : Boolean = true, csrOhDecoder : Boolean = true, @@ -83,6 +87,46 @@ object CsrPluginConfig{ def all : CsrPluginConfig = all(0x00000020l) def small : CsrPluginConfig = small(0x00000020l) def smallest : CsrPluginConfig = smallest(0x00000020l) + + def openSbi(mhartid : Int, misa : Int) = CsrPluginConfig( + catchIllegalAccess = true, + mvendorid = 0, + marchid = 0, + mimpid = 0, + mhartid = mhartid, + misaExtensionsInit = misa, + misaAccess = CsrAccess.READ_ONLY, + mtvecAccess = CsrAccess.READ_WRITE, //Could have been WRITE_ONLY :( + mtvecInit = null, + mepcAccess = CsrAccess.READ_WRITE, + mscratchGen = true, + mcauseAccess = CsrAccess.READ_ONLY, + mbadaddrAccess = CsrAccess.READ_ONLY, + mcycleAccess = CsrAccess.NONE, + minstretAccess = CsrAccess.NONE, + ucycleAccess = CsrAccess.NONE, + wfiGenAsWait = true, + ecallGen = true, + xtvecModeGen = false, + noCsrAlu = false, + wfiGenAsNop = false, + ebreakGen = false, //TODO + userGen = true, + supervisorGen = true, + sscratchGen = true, + stvecAccess = CsrAccess.READ_WRITE, + sepcAccess = CsrAccess.READ_WRITE, + scauseAccess = CsrAccess.READ_WRITE, + sbadaddrAccess = CsrAccess.READ_WRITE, + scycleAccess = CsrAccess.NONE, + sinstretAccess = CsrAccess.NONE, + satpAccess = CsrAccess.NONE, + medelegAccess = CsrAccess.READ_WRITE, //Could have been WRITE_ONLY :( + midelegAccess = CsrAccess.READ_WRITE, //Could have been WRITE_ONLY :( + pipelineCsrRead = false, + deterministicInteruptionEntry = false + ) + def linuxMinimal(mtVecInit : BigInt) = CsrPluginConfig( catchIllegalAccess = true, mvendorid = 1, @@ -346,6 +390,8 @@ class CsrPlugin(val config: CsrPluginConfig) extends Plugin[VexRiscv] with Excep var contextSwitching : Bool = null var thirdPartyWake : Bool = null var inWfi : Bool = null + var externalMhartId : UInt = null + var utime : UInt = null override def askWake(): Unit = thirdPartyWake := True @@ -474,6 +520,9 @@ class CsrPlugin(val config: CsrPluginConfig) extends Plugin[VexRiscv] with Excep pipeline.update(MPP, UInt(2 bits)) + + if(withExternalMhartid) externalMhartId = in UInt(mhartidWidth bits) + if(utimeAccess != CsrAccess.NONE) utime = in UInt(64 bits) setName("utime") } def inhibateInterrupts() : Unit = allowInterrupts := False @@ -559,7 +608,8 @@ class CsrPlugin(val config: CsrPluginConfig) extends Plugin[VexRiscv] with Excep if(mvendorid != null) READ_ONLY(CSR.MVENDORID, U(mvendorid)) if(marchid != null) READ_ONLY(CSR.MARCHID , U(marchid )) if(mimpid != null) READ_ONLY(CSR.MIMPID , U(mimpid )) - if(mhartid != null) READ_ONLY(CSR.MHARTID , U(mhartid )) + if(mhartid != null && !withExternalMhartid) READ_ONLY(CSR.MHARTID , U(mhartid )) + if(withExternalMhartid) READ_ONLY(CSR.MHARTID , externalMhartId) misaAccess(CSR.MISA, xlen-2 -> misa.base , 0 -> misa.extensions) //Machine CSR @@ -587,6 +637,11 @@ class CsrPlugin(val config: CsrPluginConfig) extends Plugin[VexRiscv] with Excep ucycleAccess(CSR.UCYCLE, mcycle(31 downto 0)) ucycleAccess(CSR.UCYCLEH, mcycle(63 downto 32)) + if(utimeAccess != CsrAccess.NONE) { + utimeAccess(CSR.UTIME, utime(31 downto 0)) + utimeAccess(CSR.UTIMEH, utime(63 downto 32)) + } + pipeline(MPP) := mstatus.MPP } @@ -834,7 +889,7 @@ class CsrPlugin(val config: CsrPluginConfig) extends Plugin[VexRiscv] with Excep interruptJump := interrupt.valid && pipelineLiberator.done && allowInterrupts if(pipelinedInterrupt) interrupt.valid clearWhen(interruptJump) //avoid double fireing - val hadException = RegNext(exception) init(False) + val hadException = RegNext(exception) init(False) addTag(Verilator.public) pipelineLiberator.done.clearWhen(hadException) diff --git a/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala b/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala index fd45dd8..0b580d8 100644 --- a/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala +++ b/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala @@ -26,7 +26,7 @@ class DBusCachedPlugin(val config : DataCacheConfig, relaxedMemoryTranslationRegister : Boolean = false, csrInfo : Boolean = false) extends Plugin[VexRiscv] with DBusAccessService { import config._ - + assert(!(config.withExternalAmo && !dBusRspSlavePipe)) assert(isPow2(cacheSize)) assert(!(memoryTranslatorPortConfig != null && config.cacheSize/config.wayCount > 4096), "When the D$ is used with MMU, each way can't be bigger than a page (4096 bytes)") @@ -49,6 +49,8 @@ class DBusCachedPlugin(val config : DataCacheConfig, object MEMORY_ADDRESS_LOW extends Stageable(UInt(2 bits)) object MEMORY_LRSC extends Stageable(Bool) object MEMORY_AMO extends Stageable(Bool) + object MEMORY_FENCE extends Stageable(Bool) + object MEMORY_FORCE_CONSTISTENCY extends Stageable(Bool) object IS_DBUS_SHARING extends Stageable(Bool()) object MEMORY_VIRTUAL_ADDRESS extends Stageable(UInt(32 bits)) @@ -56,6 +58,8 @@ class DBusCachedPlugin(val config : DataCacheConfig, import Riscv._ import pipeline.config._ + dBus = master(DataCacheMemBus(this.config)).setName("dBus") + val decoderService = pipeline.service(classOf[DecoderService]) val stdActions = List[(Stageable[_ <: BaseType],Any)]( @@ -142,7 +146,13 @@ class DBusCachedPlugin(val config : DataCacheConfig, MEMORY_MANAGMENT -> True )) - decoderService.add(FENCE, Nil) + withWriteResponse match { + case false => decoderService.add(FENCE, Nil) + case true => { + decoderService.addDefault(MEMORY_FENCE, False) + decoderService.add(FENCE, List(MEMORY_FENCE -> True)) + } + } mmuBus = pipeline.service(classOf[MemoryTranslator]).newTranslationPort(MemoryTranslatorPort.PRIORITY_DATA ,memoryTranslatorPortConfig) redoBranch = pipeline.service(classOf[JumpService]).createJumpInterface(if(pipeline.writeBack != null) pipeline.writeBack else pipeline.memory) @@ -160,17 +170,42 @@ class DBusCachedPlugin(val config : DataCacheConfig, import pipeline._ import pipeline.config._ - dBus = master(DataCacheMemBus(this.config)).setName("dBus") + val twoStageMmu = mmuBus.p.latency match { + case 0 => false + case 1 => true + } - val cache = new DataCache(this.config.copy( - mergeExecuteMemory = writeBack == null - )) + val cache = new DataCache( + this.config.copy( + mergeExecuteMemory = writeBack == null + ), + mmuParameter = mmuBus.p + ) //Interconnect the plugin dBus with the cache dBus with some optional pipelining def optionPipe[T](cond : Boolean, on : T)(f : T => T) : T = if(cond) f(on) else on def cmdBuf = optionPipe(dBusCmdSlavePipe, cache.io.mem.cmd)(_.s2mPipe()) dBus.cmd << optionPipe(dBusCmdMasterPipe, cmdBuf)(_.m2sPipe()) - cache.io.mem.rsp << optionPipe(dBusRspSlavePipe,dBus.rsp)(_.m2sPipe()) + cache.io.mem.rsp << (dBusRspSlavePipe match { + case false => dBus.rsp + case true if !withExternalAmo => dBus.rsp.m2sPipe() + case true if withExternalAmo => { + val rsp = Flow (DataCacheMemRsp(cache.p)) + rsp.valid := RegNext(dBus.rsp.valid) init(False) + rsp.exclusive := RegNext(dBus.rsp.exclusive) + rsp.error := RegNext(dBus.rsp.error) + rsp.last := RegNext(dBus.rsp.last) + rsp.aggregated := RegNext(dBus.rsp.aggregated) + rsp.data := RegNextWhen(dBus.rsp.data, dBus.rsp.valid && !cache.io.cpu.writeBack.keepMemRspData) + rsp + } + }) + + if(withInvalidate) { + cache.io.mem.inv << dBus.inv + cache.io.mem.ack >> dBus.ack + cache.io.mem.sync << dBus.sync + } pipeline plug new Area{ //Memory bandwidth counter @@ -186,6 +221,16 @@ class DBusCachedPlugin(val config : DataCacheConfig, when(mmuBus.busy && arbitration.isValid && input(MEMORY_ENABLE)) { arbitration.haltItself := True } + + + //Manage write to read hit ordering (ensure invalidation timings) + val fence = new Area { + insert(MEMORY_FORCE_CONSTISTENCY) := False + when(input(INSTRUCTION)(25)) { //RL + if (withLrSc) insert(MEMORY_FORCE_CONSTISTENCY) setWhen (input(MEMORY_LRSC)) + if (withAmo) insert(MEMORY_FORCE_CONSTISTENCY) setWhen (input(MEMORY_AMO)) + } + } } execute plug new Area { @@ -202,9 +247,16 @@ class DBusCachedPlugin(val config : DataCacheConfig, ) cache.io.cpu.execute.args.size := size + if(twoStageMmu) { + mmuBus.cmd(0).isValid := cache.io.cpu.execute.isValid + mmuBus.cmd(0).isStuck := arbitration.isStuck + mmuBus.cmd(0).virtualAddress := cache.io.cpu.execute.address + mmuBus.cmd(0).bypassTranslation := False + } cache.io.cpu.flush.valid := arbitration.isValid && input(MEMORY_MANAGMENT) - arbitration.haltItself setWhen(cache.io.cpu.flush.isStall) + cache.io.cpu.execute.args.totalyConsistent := input(MEMORY_FORCE_CONSTISTENCY) + arbitration.haltItself setWhen(cache.io.cpu.flush.isStall || cache.io.cpu.execute.haltIt) if(withLrSc) { cache.io.cpu.execute.args.isLrsc := False @@ -240,11 +292,15 @@ class DBusCachedPlugin(val config : DataCacheConfig, cache.io.cpu.memory.isValid := arbitration.isValid && input(MEMORY_ENABLE) cache.io.cpu.memory.isStuck := arbitration.isStuck - cache.io.cpu.memory.isRemoved := arbitration.removeIt cache.io.cpu.memory.address := (if(relaxedMemoryTranslationRegister) input(MEMORY_VIRTUAL_ADDRESS) else if(mmuAndBufferStage == execute) cache.io.cpu.execute.address else U(input(REGFILE_WRITE_DATA))) - cache.io.cpu.memory.mmuBus <> mmuBus - cache.io.cpu.memory.mmuBus.rsp.isIoAccess setWhen(pipeline(DEBUG_BYPASS_CACHE) && !cache.io.cpu.memory.isWrite) + mmuBus.cmd.last.isValid := cache.io.cpu.memory.isValid + mmuBus.cmd.last.isStuck := cache.io.cpu.memory.isStuck + mmuBus.cmd.last.virtualAddress := cache.io.cpu.memory.address + mmuBus.cmd.last.bypassTranslation := False + mmuBus.end := !arbitration.isStuck || arbitration.removeIt + cache.io.cpu.memory.mmuRsp := mmuBus.rsp + cache.io.cpu.memory.mmuRsp.isIoAccess setWhen(pipeline(DEBUG_BYPASS_CACHE) && !cache.io.cpu.memory.isWrite) } val managementStage = stages.last @@ -254,7 +310,31 @@ class DBusCachedPlugin(val config : DataCacheConfig, cache.io.cpu.writeBack.isStuck := arbitration.isStuck cache.io.cpu.writeBack.isUser := (if(privilegeService != null) privilegeService.isUser() else False) cache.io.cpu.writeBack.address := U(input(REGFILE_WRITE_DATA)) - if(withLrSc) cache.io.cpu.writeBack.clearLrsc := service(classOf[IContextSwitching]).isContextSwitching + + val fence = if(withInvalidate) { + cache.io.cpu.writeBack.fence := input(INSTRUCTION)(31 downto 20).as(FenceFlags()) + val aquire = False + if(withWriteResponse) when(input(INSTRUCTION)(26)) { //AQ + if(withLrSc) when(input(MEMORY_LRSC)){ + aquire := True + } + if(withAmo) when(input(MEMORY_AMO)){ + aquire := True + } + } + + when(aquire){ + cache.io.cpu.writeBack.fence.forceAll() + } + + when(!input(MEMORY_FENCE) || !arbitration.isFiring){ + cache.io.cpu.writeBack.fence.clearAll() + } + + when(arbitration.isValid && (input(MEMORY_FENCE) || aquire)){ + mmuAndBufferStage.arbitration.haltByOther := True //Ensure that the fence affect the memory stage instruction by stoping it + } + } redoBranch.valid := False redoBranch.payload := input(PC) @@ -332,9 +412,9 @@ class DBusCachedPlugin(val config : DataCacheConfig, } } execute.insert(IS_DBUS_SHARING) := dBusAccess.cmd.fire + mmuBus.cmd.last.bypassTranslation setWhen(mmuAndBufferStage.input(IS_DBUS_SHARING)) + if(twoStageMmu) mmuBus.cmd(0).bypassTranslation setWhen(execute.input(IS_DBUS_SHARING)) - - mmuBus.cmd.bypassTranslation setWhen(mmuAndBufferStage.input(IS_DBUS_SHARING)) if(mmuAndBufferStage != execute) (cache.io.cpu.memory.isValid setWhen(mmuAndBufferStage.input(IS_DBUS_SHARING))) cache.io.cpu.writeBack.isValid setWhen(managementStage.input(IS_DBUS_SHARING)) dBusAccess.rsp.valid := managementStage.input(IS_DBUS_SHARING) && !cache.io.cpu.writeBack.isWrite && (cache.io.cpu.redo || !cache.io.cpu.writeBack.haltIt) diff --git a/src/main/scala/vexriscv/plugin/DBusSimplePlugin.scala b/src/main/scala/vexriscv/plugin/DBusSimplePlugin.scala index e08b640..ba896fa 100644 --- a/src/main/scala/vexriscv/plugin/DBusSimplePlugin.scala +++ b/src/main/scala/vexriscv/plugin/DBusSimplePlugin.scala @@ -298,7 +298,6 @@ class DBusSimplePlugin(catchAddressMisaligned : Boolean = false, object MEMORY_ADDRESS_LOW extends Stageable(UInt(2 bits)) object ALIGNEMENT_FAULT extends Stageable(Bool) object MMU_FAULT extends Stageable(Bool) - object MMU_RSP extends Stageable(MemoryTranslatorRsp()) object MEMORY_ATOMIC extends Stageable(Bool) object ATOMIC_HIT extends Stageable(Bool) object MEMORY_STORE extends Stageable(Bool) @@ -393,6 +392,8 @@ class DBusSimplePlugin(catchAddressMisaligned : Boolean = false, import pipeline._ import pipeline.config._ + object MMU_RSP extends Stageable(MemoryTranslatorRsp(mmuBus.p)) + dBus = master(DBusSimpleBus()).setName("dBus") @@ -448,9 +449,10 @@ class DBusSimplePlugin(catchAddressMisaligned : Boolean = false, insert(FORMAL_MEM_WDATA) := dBus.cmd.payload.data val mmu = (mmuBus != null) generate new Area { - mmuBus.cmd.isValid := arbitration.isValid && input(MEMORY_ENABLE) - mmuBus.cmd.virtualAddress := input(SRC_ADD).asUInt - mmuBus.cmd.bypassTranslation := False + mmuBus.cmd.last.isValid := arbitration.isValid && input(MEMORY_ENABLE) + mmuBus.cmd.last.isStuck := arbitration.isStuck + mmuBus.cmd.last.virtualAddress := input(SRC_ADD).asUInt + mmuBus.cmd.last.bypassTranslation := False mmuBus.end := !arbitration.isStuck || arbitration.isRemoved dBus.cmd.address := mmuBus.rsp.physicalAddress @@ -469,13 +471,9 @@ class DBusSimplePlugin(catchAddressMisaligned : Boolean = false, val atomic = withLrSc generate new Area{ val reserved = RegInit(False) insert(ATOMIC_HIT) := reserved - when(arbitration.isFiring && input(MEMORY_ENABLE) && input(MEMORY_ATOMIC) && !input(MEMORY_STORE)){ - reserved := True + when(arbitration.isFiring && input(MEMORY_ENABLE) && input(MEMORY_ATOMIC) && (if(mmuBus != null) !input(MMU_FAULT) else True) && !skipCmd){ + reserved := !input(MEMORY_STORE) } - when(service(classOf[IContextSwitching]).isContextSwitching){ - reserved := False - } - when(input(MEMORY_STORE) && input(MEMORY_ATOMIC) && !input(ATOMIC_HIT)){ skipCmd := True } diff --git a/src/main/scala/vexriscv/plugin/DebugPlugin.scala b/src/main/scala/vexriscv/plugin/DebugPlugin.scala index f38e3bf..4797e21 100644 --- a/src/main/scala/vexriscv/plugin/DebugPlugin.scala +++ b/src/main/scala/vexriscv/plugin/DebugPlugin.scala @@ -9,6 +9,8 @@ import spinal.core._ import spinal.lib._ import spinal.lib.bus.amba3.apb.{Apb3, Apb3Config} import spinal.lib.bus.avalon.{AvalonMM, AvalonMMConfig} +import spinal.lib.bus.bmb.{Bmb, BmbAccessParameter, BmbParameter} +import spinal.lib.bus.simple.PipelinedMemoryBus import scala.collection.mutable.ArrayBuffer @@ -22,6 +24,16 @@ case class DebugExtensionRsp() extends Bundle{ val data = Bits(32 bit) } +object DebugExtensionBus{ + def getBmbAccessParameter(source : BmbAccessParameter) = BmbAccessParameter( + addressWidth = 8, + dataWidth = 32, + lengthWidth = 2, + sourceWidth = source.sourceWidth, + contextWidth = source.contextWidth + ) +} + case class DebugExtensionBus() extends Bundle with IMasterSlave{ val cmd = Stream(DebugExtensionCmd()) val rsp = DebugExtensionRsp() //one cycle latency @@ -63,6 +75,42 @@ case class DebugExtensionBus() extends Bundle with IMasterSlave{ bus } + def fromPipelinedMemoryBus(): PipelinedMemoryBus ={ + val bus = PipelinedMemoryBus(32, 32) + + cmd.arbitrationFrom(bus.cmd) + cmd.wr := bus.cmd.write + cmd.address := bus.cmd.address.resized + cmd.data := bus.cmd.data + + bus.rsp.valid := RegNext(cmd.fire) init(False) + bus.rsp.data := rsp.data + + bus + } + + def fromBmb(): Bmb ={ + val bus = Bmb(BmbParameter( + addressWidth = 8, + dataWidth = 32, + lengthWidth = 2, + sourceWidth = 0, + contextWidth = 0 + )) + + cmd.arbitrationFrom(bus.cmd) + cmd.wr := bus.cmd.isWrite + cmd.address := bus.cmd.address + cmd.data := bus.cmd.data + + bus.rsp.valid := RegNext(cmd.fire) init(False) + bus.rsp.data := rsp.data + bus.rsp.last := True + bus.rsp.setSuccess() + + bus + } + def from(c : SystemDebuggerConfig) : SystemDebuggerMemBus = { val mem = SystemDebuggerMemBus(c) cmd.valid := mem.cmd.valid @@ -116,7 +164,7 @@ case class DebugExtensionIo() extends Bundle with IMasterSlave{ -class DebugPlugin(val debugClockDomain : ClockDomain, hardwareBreakpointCount : Int = 0) extends Plugin[VexRiscv] { +class DebugPlugin(var debugClockDomain : ClockDomain, hardwareBreakpointCount : Int = 0) extends Plugin[VexRiscv] { var io : DebugExtensionIo = null val injectionAsks = ArrayBuffer[(Stage, Bool)]() diff --git a/src/main/scala/vexriscv/plugin/IBusCachedPlugin.scala b/src/main/scala/vexriscv/plugin/IBusCachedPlugin.scala index 642fcbb..e23cec7 100644 --- a/src/main/scala/vexriscv/plugin/IBusCachedPlugin.scala +++ b/src/main/scala/vexriscv/plugin/IBusCachedPlugin.scala @@ -124,7 +124,7 @@ class IBusCachedPlugin(resetVector : BigInt = 0x80000000l, import pipeline.config._ pipeline plug new FetchArea(pipeline) { - val cache = new InstructionCache(IBusCachedPlugin.this.config.copy(bypassGen = tightlyGen)) + val cache = new InstructionCache(IBusCachedPlugin.this.config.copy(bypassGen = tightlyGen), if(mmuBus != null) mmuBus.p else MemoryTranslatorBusParameter(0,0)) iBus = master(new InstructionCacheMemBus(IBusCachedPlugin.this.config)).setName("iBus") iBus <> cache.io.mem iBus.cmd.address.allowOverride := cache.io.mem.cmd.address @@ -155,8 +155,13 @@ class IBusCachedPlugin(resetVector : BigInt = 0x80000000l, cache.io.cpu.prefetch.pc := stages(0).input.payload stages(0).halt setWhen (cache.io.cpu.prefetch.haltIt) - - cache.io.cpu.fetch.isRemoved := externalFlush + if(mmuBus != null && mmuBus.p.latency == 1) { + stages(0).halt setWhen(mmuBus.busy) + mmuBus.cmd(0).isValid := cache.io.cpu.prefetch.isValid + mmuBus.cmd(0).isStuck := !stages(0).input.ready + mmuBus.cmd(0).virtualAddress := cache.io.cpu.prefetch.pc + mmuBus.cmd(0).bypassTranslation := False + } } @@ -172,8 +177,15 @@ class IBusCachedPlugin(resetVector : BigInt = 0x80000000l, cache.io.cpu.fetch.isStuck := !stages(1).input.ready cache.io.cpu.fetch.pc := stages(1).input.payload + if(mmuBus != null) { + mmuBus.cmd.last.isValid := cache.io.cpu.fetch.isValid + mmuBus.cmd.last.isStuck := !stages(1).input.ready + mmuBus.cmd.last.virtualAddress := cache.io.cpu.fetch.pc + mmuBus.cmd.last.bypassTranslation := False + mmuBus.end := stages(1).input.ready || externalFlush + if (mmuBus.p.latency == 0) stages(1).halt setWhen (mmuBus.busy) + } - stages(1).halt setWhen(cache.io.cpu.fetch.haltIt) if (!twoCycleCache) { cache.io.cpu.fetch.isUser := privilegeService.isUser() @@ -249,16 +261,15 @@ class IBusCachedPlugin(resetVector : BigInt = 0x80000000l, } if (mmuBus != null) { - cache.io.cpu.fetch.mmuBus <> mmuBus + cache.io.cpu.fetch.mmuRsp <> mmuBus.rsp } else { - cache.io.cpu.fetch.mmuBus.rsp.physicalAddress := cache.io.cpu.fetch.mmuBus.cmd.virtualAddress - cache.io.cpu.fetch.mmuBus.rsp.allowExecute := True - cache.io.cpu.fetch.mmuBus.rsp.allowRead := True - cache.io.cpu.fetch.mmuBus.rsp.allowWrite := True - cache.io.cpu.fetch.mmuBus.rsp.isIoAccess := False - cache.io.cpu.fetch.mmuBus.rsp.exception := False - cache.io.cpu.fetch.mmuBus.rsp.refilling := False - cache.io.cpu.fetch.mmuBus.busy := False + cache.io.cpu.fetch.mmuRsp.physicalAddress := cache.io.cpu.fetch.pc + cache.io.cpu.fetch.mmuRsp.allowExecute := True + cache.io.cpu.fetch.mmuRsp.allowRead := True + cache.io.cpu.fetch.mmuRsp.allowWrite := True + cache.io.cpu.fetch.mmuRsp.isIoAccess := False + cache.io.cpu.fetch.mmuRsp.exception := False + cache.io.cpu.fetch.mmuRsp.refilling := False } val flushStage = decode diff --git a/src/main/scala/vexriscv/plugin/IBusSimplePlugin.scala b/src/main/scala/vexriscv/plugin/IBusSimplePlugin.scala index b8bc978..19145f5 100644 --- a/src/main/scala/vexriscv/plugin/IBusSimplePlugin.scala +++ b/src/main/scala/vexriscv/plugin/IBusSimplePlugin.scala @@ -318,9 +318,9 @@ class IBusSimplePlugin( resetVector : BigInt, } val mmu = (mmuBus != null) generate new Area { - mmuBus.cmd.isValid := cmdForkStage.input.valid - mmuBus.cmd.virtualAddress := cmdForkStage.input.payload - mmuBus.cmd.bypassTranslation := False + mmuBus.cmd.last.isValid := cmdForkStage.input.valid + mmuBus.cmd.last.virtualAddress := cmdForkStage.input.payload + mmuBus.cmd.last.bypassTranslation := False mmuBus.end := cmdForkStage.output.fire || externalFlush cmd.pc := mmuBus.rsp.physicalAddress(31 downto 2) @@ U"00" diff --git a/src/main/scala/vexriscv/plugin/MemoryTranslatorPlugin.scala b/src/main/scala/vexriscv/plugin/MemoryTranslatorPlugin.scala index 623d872..081b11d 100644 --- a/src/main/scala/vexriscv/plugin/MemoryTranslatorPlugin.scala +++ b/src/main/scala/vexriscv/plugin/MemoryTranslatorPlugin.scala @@ -22,8 +22,8 @@ class MemoryTranslatorPlugin(tlbSize : Int, val portsInfo = ArrayBuffer[MemoryTranslatorPort]() override def newTranslationPort(priority : Int,args : Any): MemoryTranslatorBus = { -// val exceptionBus = pipeline.service(classOf[ExceptionService]).newExceptionPort(stage) - val port = MemoryTranslatorPort(MemoryTranslatorBus(),priority,args.asInstanceOf[MemoryTranslatorPortConfig]/*,exceptionBus*/) + val config = args.asInstanceOf[MemoryTranslatorPortConfig] + val port = MemoryTranslatorPort(MemoryTranslatorBus(MemoryTranslatorBusParameter(wayCount = 0)),priority, config/*,exceptionBus*/) portsInfo += port port.bus } @@ -70,17 +70,17 @@ class MemoryTranslatorPlugin(tlbSize : Int, val ports = for ((port, portId) <- sortedPortsInfo.zipWithIndex) yield new Area { val cache = Vec(Reg(CacheLine()) init, port.args.portTlbSize) - val cacheHits = cache.map(line => line.valid && line.virtualAddress === port.bus.cmd.virtualAddress(31 downto 12)) + val cacheHits = cache.map(line => line.valid && line.virtualAddress === port.bus.cmd.last.virtualAddress(31 downto 12)) val cacheHit = cacheHits.asBits.orR val cacheLine = MuxOH(cacheHits, cache) - val isInMmuRange = virtualRange(port.bus.cmd.virtualAddress) && !port.bus.cmd.bypassTranslation + val isInMmuRange = virtualRange(port.bus.cmd.last.virtualAddress) && !port.bus.cmd.last.bypassTranslation val sharedMiss = RegInit(False) val sharedIterator = Reg(UInt(log2Up(tlbSize + 1) bits)) val sharedAccessed = RegInit(B"00") val entryToReplace = Counter(port.args.portTlbSize) - val sharedAccessAsked = RegNext(port.bus.cmd.isValid && !cacheHit && sharedIterator < tlbSize && isInMmuRange) + val sharedAccessAsked = RegNext(port.bus.cmd.last.isValid && !cacheHit && sharedIterator < tlbSize && isInMmuRange) val sharedAccessGranted = sharedAccessAsked && shared.free when(sharedAccessGranted) { shared.readAddr := sharedIterator.resized @@ -92,7 +92,7 @@ class MemoryTranslatorPlugin(tlbSize : Int, } when(sharedAccessed.msb){ - when(shared.readData.virtualAddress === port.bus.cmd.virtualAddress(31 downto 12)){ + when(shared.readData.virtualAddress === port.bus.cmd.last.virtualAddress(31 downto 12)){ cache(entryToReplace) := shared.readData entryToReplace.increment() } @@ -108,7 +108,7 @@ class MemoryTranslatorPlugin(tlbSize : Int, when(isInMmuRange) { - port.bus.rsp.physicalAddress := cacheLine.physicalAddress @@ port.bus.cmd.virtualAddress(11 downto 0) + port.bus.rsp.physicalAddress := cacheLine.physicalAddress @@ port.bus.cmd.last.virtualAddress(11 downto 0) port.bus.rsp.allowRead := cacheLine.allowRead port.bus.rsp.allowWrite := cacheLine.allowWrite port.bus.rsp.allowExecute := cacheLine.allowExecute @@ -116,7 +116,7 @@ class MemoryTranslatorPlugin(tlbSize : Int, // port.bus.rsp.hit := cacheHit // port.stage.arbitration.haltItself setWhen (port.bus.cmd.isValid && !cacheHit && !sharedMiss) } otherwise { - port.bus.rsp.physicalAddress := port.bus.cmd.virtualAddress + port.bus.rsp.physicalAddress := port.bus.cmd.last.virtualAddress port.bus.rsp.allowRead := True port.bus.rsp.allowWrite := True port.bus.rsp.allowExecute := True diff --git a/src/main/scala/vexriscv/plugin/MmuPlugin.scala b/src/main/scala/vexriscv/plugin/MmuPlugin.scala index 9dedde5..e797bcf 100644 --- a/src/main/scala/vexriscv/plugin/MmuPlugin.scala +++ b/src/main/scala/vexriscv/plugin/MmuPlugin.scala @@ -34,9 +34,9 @@ object MmuPort{ val PRIORITY_DATA = 1 val PRIORITY_INSTRUCTION = 0 } -case class MmuPort(bus : MemoryTranslatorBus, priority : Int, args : MmuPortConfig, id : Int/*, exceptionBus: Flow[ExceptionCause]*/) +case class MmuPort(bus : MemoryTranslatorBus, priority : Int, args : MmuPortConfig, id : Int) -case class MmuPortConfig(portTlbSize : Int) +case class MmuPortConfig(portTlbSize : Int, latency : Int = 0, earlyRequireMmuLockup : Boolean = false, earlyCacheHits : Boolean = false) class MmuPlugin(ioRange : UInt => Bool, virtualRange : UInt => Bool = address => True, @@ -47,7 +47,8 @@ class MmuPlugin(ioRange : UInt => Bool, val portsInfo = ArrayBuffer[MmuPort]() override def newTranslationPort(priority : Int,args : Any): MemoryTranslatorBus = { - val port = MmuPort(MemoryTranslatorBus(),priority,args.asInstanceOf[MmuPortConfig], portsInfo.length) + val config = args.asInstanceOf[MmuPortConfig] + val port = MmuPort(MemoryTranslatorBus(MemoryTranslatorBusParameter(wayCount = config.portTlbSize, latency = config.latency)),priority, config, portsInfo.length) portsInfo += port port.bus } @@ -71,7 +72,7 @@ class MmuPlugin(ioRange : UInt => Bool, val csrService = pipeline.service(classOf[CsrInterface]) //Sorted by priority - val sortedPortsInfo = portsInfo.sortWith((a,b) => a.priority > b.priority) + val sortedPortsInfo = portsInfo.sortBy(_.priority) case class CacheLine() extends Bundle { val valid, exception, superPage = Bool @@ -102,33 +103,51 @@ class MmuPlugin(ioRange : UInt => Bool, val ports = for (port <- sortedPortsInfo) yield new Area { val handle = port val id = port.id - val cache = Vec(Reg(CacheLine()) init, port.args.portTlbSize) - val cacheHits = cache.map(line => line.valid && line.virtualAddress(1) === port.bus.cmd.virtualAddress(31 downto 22) && (line.superPage || line.virtualAddress(0) === port.bus.cmd.virtualAddress(21 downto 12))) - val cacheHit = cacheHits.asBits.orR - val cacheLine = MuxOH(cacheHits, cache) val privilegeService = pipeline.serviceElse(classOf[PrivilegeService], PrivilegeServiceDefault()) - val entryToReplace = Counter(port.args.portTlbSize) - val requireMmuLockup = virtualRange(port.bus.cmd.virtualAddress) && !port.bus.cmd.bypassTranslation && csr.satp.mode + val cache = Vec(Reg(CacheLine()) init, port.args.portTlbSize) + + def toRsp[T <: Data](data : T, from : MemoryTranslatorCmd) : T = from match { + case _ if from == port.bus.cmd.last => data + case _ => { + val next = port.bus.cmd.dropWhile(_ != from)(1) + toRsp(RegNextWhen(data, !next.isStuck), next) + } + } + val requireMmuLockupCmd = port.bus.cmd.takeRight(if(port.args.earlyRequireMmuLockup) 2 else 1).head + + val requireMmuLockupCalc = virtualRange(requireMmuLockupCmd.virtualAddress) && !requireMmuLockupCmd.bypassTranslation && csr.satp.mode if(!enableMmuInMachineMode) { - requireMmuLockup clearWhen(!csr.status.mprv && privilegeService.isMachine()) + requireMmuLockupCalc clearWhen(!csr.status.mprv && privilegeService.isMachine()) when(privilegeService.isMachine()) { if (port.priority == MmuPort.PRIORITY_DATA) { - requireMmuLockup clearWhen (!csr.status.mprv || pipeline(MPP) === 3) + requireMmuLockupCalc clearWhen (!csr.status.mprv || pipeline(MPP) === 3) } else { - requireMmuLockup := False + requireMmuLockupCalc := False } } } + val cacheHitsCmd = port.bus.cmd.takeRight(if(port.args.earlyCacheHits) 2 else 1).head + val cacheHitsCalc = B(cache.map(line => line.valid && line.virtualAddress(1) === cacheHitsCmd.virtualAddress(31 downto 22) && (line.superPage || line.virtualAddress(0) === cacheHitsCmd.virtualAddress(21 downto 12)))) + + + val requireMmuLockup = toRsp(requireMmuLockupCalc, requireMmuLockupCmd) + val cacheHits = toRsp(cacheHitsCalc, cacheHitsCmd) + + val cacheHit = cacheHits.asBits.orR + val cacheLine = MuxOH(cacheHits, cache) + val entryToReplace = Counter(port.args.portTlbSize) + + when(requireMmuLockup) { - port.bus.rsp.physicalAddress := cacheLine.physicalAddress(1) @@ (cacheLine.superPage ? port.bus.cmd.virtualAddress(21 downto 12) | cacheLine.physicalAddress(0)) @@ port.bus.cmd.virtualAddress(11 downto 0) + port.bus.rsp.physicalAddress := cacheLine.physicalAddress(1) @@ (cacheLine.superPage ? port.bus.cmd.last.virtualAddress(21 downto 12) | cacheLine.physicalAddress(0)) @@ port.bus.cmd.last.virtualAddress(11 downto 0) port.bus.rsp.allowRead := cacheLine.allowRead || csr.status.mxr && cacheLine.allowExecute port.bus.rsp.allowWrite := cacheLine.allowWrite port.bus.rsp.allowExecute := cacheLine.allowExecute port.bus.rsp.exception := cacheHit && (cacheLine.exception || cacheLine.allowUser && privilegeService.isSupervisor() && !csr.status.sum || !cacheLine.allowUser && privilegeService.isUser()) port.bus.rsp.refilling := !cacheHit } otherwise { - port.bus.rsp.physicalAddress := port.bus.cmd.virtualAddress + port.bus.rsp.physicalAddress := port.bus.cmd.last.virtualAddress port.bus.rsp.allowRead := True port.bus.rsp.allowWrite := True port.bus.rsp.allowExecute := True @@ -137,6 +156,12 @@ class MmuPlugin(ioRange : UInt => Bool, } port.bus.rsp.isIoAccess := ioRange(port.bus.rsp.physicalAddress) + port.bus.rsp.bypassTranslation := !requireMmuLockup + for(wayId <- 0 until port.args.portTlbSize){ + port.bus.rsp.ways(wayId).sel := cacheHits(wayId) + port.bus.rsp.ways(wayId).physical := cache(wayId).physicalAddress(1) @@ (cache(wayId).superPage ? port.bus.cmd.last.virtualAddress(21 downto 12) | cache(wayId).physicalAddress(0)) @@ port.bus.cmd.last.virtualAddress(11 downto 0) + } + // Avoid keeping any invalid line in the cache after an exception. // https://github.com/riscv/riscv-linux/blob/8fe28cb58bcb235034b64cbbb7550a8a43fd88be/arch/riscv/include/asm/pgtable.h#L276 when(service(classOf[IContextSwitching]).isContextSwitching) { @@ -154,21 +179,23 @@ class MmuPlugin(ioRange : UInt => Bool, } val state = RegInit(State.IDLE) val vpn = Reg(Vec(UInt(10 bits), UInt(10 bits))) - val portId = Reg(UInt(log2Up(portsInfo.length) bits)) + val portSortedOh = Reg(Bits(portsInfo.length bits)) case class PTE() extends Bundle { val V, R, W ,X, U, G, A, D = Bool() val RSW = Bits(2 bits) val PPN0 = UInt(10 bits) val PPN1 = UInt(12 bits) } + + val dBusRspStaged = dBusAccess.rsp.stage() val dBusRsp = new Area{ val pte = PTE() - pte.assignFromBits(dBusAccess.rsp.data) - val exception = !pte.V || (!pte.R && pte.W) || dBusAccess.rsp.error + pte.assignFromBits(dBusRspStaged.data) + val exception = !pte.V || (!pte.R && pte.W) || dBusRspStaged.error val leaf = pte.R || pte.X } - val pteBuffer = RegNextWhen(dBusRsp.pte, dBusAccess.rsp.valid && !dBusAccess.rsp.redo) + val pteBuffer = RegNextWhen(dBusRsp.pte, dBusRspStaged.valid && !dBusRspStaged.redo) dBusAccess.cmd.valid := False dBusAccess.cmd.write := False @@ -176,16 +203,25 @@ class MmuPlugin(ioRange : UInt => Bool, dBusAccess.cmd.address.assignDontCare() dBusAccess.cmd.data.assignDontCare() dBusAccess.cmd.writeMask.assignDontCare() + + val refills = OHMasking.last(B(sortedPortsInfo.map(port => port.bus.cmd.last.isValid && port.bus.rsp.refilling))) switch(state){ is(State.IDLE){ - for(port <- portsInfo.sortBy(_.priority)){ - when(port.bus.cmd.isValid && port.bus.rsp.refilling){ - vpn(1) := port.bus.cmd.virtualAddress(31 downto 22) - vpn(0) := port.bus.cmd.virtualAddress(21 downto 12) - portId := port.id - state := State.L1_CMD - } + when(refills.orR){ + portSortedOh := refills + state := State.L1_CMD + val address = MuxOH(refills, sortedPortsInfo.map(_.bus.cmd.last.virtualAddress)) + vpn(1) := address(31 downto 22) + vpn(0) := address(21 downto 12) } +// for(port <- portsInfo.sortBy(_.priority)){ +// when(port.bus.cmd.isValid && port.bus.rsp.refilling){ +// vpn(1) := port.bus.cmd.virtualAddress(31 downto 22) +// vpn(0) := port.bus.cmd.virtualAddress(21 downto 12) +// portId := port.id +// state := State.L1_CMD +// } +// } } is(State.L1_CMD){ dBusAccess.cmd.valid := True @@ -195,12 +231,12 @@ class MmuPlugin(ioRange : UInt => Bool, } } is(State.L1_RSP){ - when(dBusAccess.rsp.valid){ + when(dBusRspStaged.valid){ state := State.L0_CMD when(dBusRsp.leaf || dBusRsp.exception){ state := State.IDLE } - when(dBusAccess.rsp.redo){ + when(dBusRspStaged.redo){ state := State.L1_CMD } } @@ -213,22 +249,22 @@ class MmuPlugin(ioRange : UInt => Bool, } } is(State.L0_RSP){ - when(dBusAccess.rsp.valid) { + when(dBusRspStaged.valid) { state := State.IDLE - when(dBusAccess.rsp.redo){ + when(dBusRspStaged.redo){ state := State.L0_CMD } } } } - for(port <- ports) { - port.handle.bus.busy := state =/= State.IDLE && portId === port.id + for((port, id) <- sortedPortsInfo.zipWithIndex) { + port.bus.busy := state =/= State.IDLE && portSortedOh(id) } - when(dBusAccess.rsp.valid && !dBusAccess.rsp.redo && (dBusRsp.leaf || dBusRsp.exception)){ - for(port <- ports){ - when(portId === port.id) { + when(dBusRspStaged.valid && !dBusRspStaged.redo && (dBusRsp.leaf || dBusRsp.exception)){ + for((port, id) <- ports.zipWithIndex) { + when(portSortedOh(id)) { port.entryToReplace.increment() for ((line, lineId) <- port.cache.zipWithIndex) { when(port.entryToReplace === lineId){ diff --git a/src/main/scala/vexriscv/plugin/SingleInstructionLimiterPlugin.scala b/src/main/scala/vexriscv/plugin/SingleInstructionLimiterPlugin.scala index deae767..c6c9706 100644 --- a/src/main/scala/vexriscv/plugin/SingleInstructionLimiterPlugin.scala +++ b/src/main/scala/vexriscv/plugin/SingleInstructionLimiterPlugin.scala @@ -9,7 +9,9 @@ class SingleInstructionLimiterPlugin() extends Plugin[VexRiscv] { override def build(pipeline: VexRiscv): Unit = { import pipeline._ import pipeline.config._ - - decode.arbitration.haltByOther.setWhen(List(decode,execute,memory,writeBack).map(_.arbitration.isValid).orR) + val fetcher = pipeline.service(classOf[IBusFetcher]) + when(fetcher.incoming() || List(decode,execute,memory,writeBack).map(_.arbitration.isValid).orR) { + fetcher.haltIt() + } } } diff --git a/src/main/scala/vexriscv/plugin/StaticMemoryTranslatorPlugin.scala b/src/main/scala/vexriscv/plugin/StaticMemoryTranslatorPlugin.scala index 351ebc5..bc910c0 100644 --- a/src/main/scala/vexriscv/plugin/StaticMemoryTranslatorPlugin.scala +++ b/src/main/scala/vexriscv/plugin/StaticMemoryTranslatorPlugin.scala @@ -11,8 +11,7 @@ class StaticMemoryTranslatorPlugin(ioRange : UInt => Bool) extends Plugin[VexRis val portsInfo = ArrayBuffer[StaticMemoryTranslatorPort]() override def newTranslationPort(priority : Int,args : Any): MemoryTranslatorBus = { -// val exceptionBus = pipeline.service(classOf[ExceptionService]).newExceptionPort(stage) - val port = StaticMemoryTranslatorPort(MemoryTranslatorBus(),priority) + val port = StaticMemoryTranslatorPort(MemoryTranslatorBus(MemoryTranslatorBusParameter(wayCount = 0)),priority) portsInfo += port port.bus } @@ -27,7 +26,7 @@ class StaticMemoryTranslatorPlugin(ioRange : UInt => Bool) extends Plugin[VexRis val core = pipeline plug new Area { val ports = for ((port, portId) <- portsInfo.zipWithIndex) yield new Area { - port.bus.rsp.physicalAddress := port.bus.cmd.virtualAddress + port.bus.rsp.physicalAddress := port.bus.cmd.last.virtualAddress port.bus.rsp.allowRead := True port.bus.rsp.allowWrite := True port.bus.rsp.allowExecute := True diff --git a/src/test/cpp/raw/common/asm.mk b/src/test/cpp/raw/common/asm.mk index 3d4b205..b63c80a 100644 --- a/src/test/cpp/raw/common/asm.mk +++ b/src/test/cpp/raw/common/asm.mk @@ -40,7 +40,7 @@ OBJS := $(addprefix $(OBJDIR)/,$(OBJS)) -all: $(OBJDIR)/$(PROJ_NAME).elf $(OBJDIR)/$(PROJ_NAME).hex $(OBJDIR)/$(PROJ_NAME).asm +all: $(OBJDIR)/$(PROJ_NAME).elf $(OBJDIR)/$(PROJ_NAME).hex $(OBJDIR)/$(PROJ_NAME).asm $(OBJDIR)/$(PROJ_NAME).bin @echo "done" $(OBJDIR)/%.elf: $(OBJS) | $(OBJDIR) diff --git a/src/test/cpp/raw/lrsc/build/lrsc.asm b/src/test/cpp/raw/lrsc/build/lrsc.asm index 95b4751..a2ba4c7 100644 --- a/src/test/cpp/raw/lrsc/build/lrsc.asm +++ b/src/test/cpp/raw/lrsc/build/lrsc.asm @@ -5,7 +5,7 @@ build/lrsc.elf: file format elf32-littleriscv Disassembly of section .crt_section: 80000000 : -80000000: 04c0006f j 8000004c <_start> +80000000: 06c0006f j 8000006c <_start> 80000004: 00000013 nop 80000008: 00000013 nop 8000000c: 00000013 nop @@ -29,189 +29,152 @@ Disassembly of section .crt_section: 80000044: 341e9073 csrw mepc,t4 80000048: 30200073 mret -8000004c <_start>: -8000004c: 00100e13 li t3,1 -80000050: 10000537 lui a0,0x10000 -80000054: 06400593 li a1,100 -80000058: 06500613 li a2,101 -8000005c: 06600693 li a3,102 -80000060: 00d52023 sw a3,0(a0) # 10000000 -80000064: 18b5262f sc.w a2,a1,(a0) -80000068: 00100713 li a4,1 -8000006c: 26e61e63 bne a2,a4,800002e8 -80000070: 00052703 lw a4,0(a0) -80000074: 26e69a63 bne a3,a4,800002e8 -80000078: 00200e13 li t3,2 -8000007c: 10000537 lui a0,0x10000 -80000080: 00450513 addi a0,a0,4 # 10000004 -80000084: 06700593 li a1,103 -80000088: 06800613 li a2,104 -8000008c: 06900693 li a3,105 -80000090: 00d52023 sw a3,0(a0) -80000094: 18b5262f sc.w a2,a1,(a0) -80000098: 00100713 li a4,1 -8000009c: 24e61663 bne a2,a4,800002e8 -800000a0: 00052703 lw a4,0(a0) -800000a4: 24e69263 bne a3,a4,800002e8 -800000a8: 00300e13 li t3,3 -800000ac: 10000537 lui a0,0x10000 -800000b0: 00450513 addi a0,a0,4 # 10000004 -800000b4: 06700593 li a1,103 -800000b8: 06800613 li a2,104 -800000bc: 06900693 li a3,105 -800000c0: 18b5262f sc.w a2,a1,(a0) -800000c4: 00100713 li a4,1 -800000c8: 22e61063 bne a2,a4,800002e8 +8000004c : +8000004c: 200002b7 lui t0,0x20000 +80000050: 00001337 lui t1,0x1 +80000054: 02000393 li t2,32 + +80000058 : +80000058: 0002ae03 lw t3,0(t0) # 20000000 +8000005c: 006282b3 add t0,t0,t1 +80000060: fff38393 addi t2,t2,-1 +80000064: fe039ae3 bnez t2,80000058 +80000068: 00008067 ret + +8000006c <_start>: +8000006c: 00100e13 li t3,1 +80000070: 10000537 lui a0,0x10000 +80000074: 06400593 li a1,100 +80000078: 06500613 li a2,101 +8000007c: 06600693 li a3,102 +80000080: 00d52023 sw a3,0(a0) # 10000000 +80000084: 18b5262f sc.w a2,a1,(a0) +80000088: 00100713 li a4,1 +8000008c: 18e61863 bne a2,a4,8000021c +80000090: 00052703 lw a4,0(a0) +80000094: 18e69463 bne a3,a4,8000021c + +80000098 : +80000098: 00200e13 li t3,2 +8000009c: 10000537 lui a0,0x10000 +800000a0: 00450513 addi a0,a0,4 # 10000004 +800000a4: 06700593 li a1,103 +800000a8: 06800613 li a2,104 +800000ac: 06900693 li a3,105 +800000b0: 00d52023 sw a3,0(a0) +800000b4: 18b5262f sc.w a2,a1,(a0) +800000b8: 00100713 li a4,1 +800000bc: 16e61063 bne a2,a4,8000021c +800000c0: 00052703 lw a4,0(a0) +800000c4: 14e69c63 bne a3,a4,8000021c +800000c8: f85ff0ef jal ra,8000004c 800000cc: 00052703 lw a4,0(a0) -800000d0: 20e69c63 bne a3,a4,800002e8 -800000d4: 00400e13 li t3,4 +800000d0: 14e69663 bne a3,a4,8000021c + +800000d4 : +800000d4: 00300e13 li t3,3 800000d8: 10000537 lui a0,0x10000 -800000dc: 00850513 addi a0,a0,8 # 10000008 -800000e0: 06a00593 li a1,106 -800000e4: 06b00613 li a2,107 -800000e8: 06c00693 li a3,108 -800000ec: 00d52023 sw a3,0(a0) -800000f0: 100527af lr.w a5,(a0) -800000f4: 18b5262f sc.w a2,a1,(a0) -800000f8: 1ed79863 bne a5,a3,800002e8 -800000fc: 1e061663 bnez a2,800002e8 -80000100: 00052703 lw a4,0(a0) -80000104: 1ee59263 bne a1,a4,800002e8 -80000108: 00500e13 li t3,5 -8000010c: 10000537 lui a0,0x10000 -80000110: 00850513 addi a0,a0,8 # 10000008 -80000114: 06d00593 li a1,109 -80000118: 06e00613 li a2,110 -8000011c: 06f00693 li a3,111 -80000120: 00d52023 sw a3,0(a0) -80000124: 18b5262f sc.w a2,a1,(a0) -80000128: 1c061063 bnez a2,800002e8 -8000012c: 00052703 lw a4,0(a0) -80000130: 1ae59c63 bne a1,a4,800002e8 -80000134: 00600e13 li t3,6 -80000138: 10000537 lui a0,0x10000 -8000013c: 00c50513 addi a0,a0,12 # 1000000c -80000140: 07000593 li a1,112 -80000144: 07100613 li a2,113 -80000148: 07200693 li a3,114 -8000014c: 10000437 lui s0,0x10000 -80000150: 01040413 addi s0,s0,16 # 10000010 -80000154: 07300493 li s1,115 -80000158: 07400913 li s2,116 -8000015c: 07500993 li s3,117 -80000160: 00d52023 sw a3,0(a0) -80000164: 01342023 sw s3,0(s0) -80000168: 100527af lr.w a5,(a0) -8000016c: 10042aaf lr.w s5,(s0) -80000170: 18b5262f sc.w a2,a1,(a0) -80000174: 1894292f sc.w s2,s1,(s0) -80000178: 16d79863 bne a5,a3,800002e8 -8000017c: 16061663 bnez a2,800002e8 -80000180: 00052703 lw a4,0(a0) -80000184: 16e59263 bne a1,a4,800002e8 -80000188: 173a9063 bne s5,s3,800002e8 -8000018c: 14091e63 bnez s2,800002e8 -80000190: 00042a03 lw s4,0(s0) -80000194: 15449a63 bne s1,s4,800002e8 -80000198: 00700e13 li t3,7 -8000019c: 10000537 lui a0,0x10000 -800001a0: 01450513 addi a0,a0,20 # 10000014 -800001a4: 07800593 li a1,120 -800001a8: 07900613 li a2,121 -800001ac: 07a00693 li a3,122 -800001b0: 01000e93 li t4,16 +800000dc: 00450513 addi a0,a0,4 # 10000004 +800000e0: 06700593 li a1,103 +800000e4: 06800613 li a2,104 +800000e8: 06900693 li a3,105 +800000ec: 18b5262f sc.w a2,a1,(a0) +800000f0: 00100713 li a4,1 +800000f4: 12e61463 bne a2,a4,8000021c +800000f8: 00052703 lw a4,0(a0) +800000fc: 12e69063 bne a3,a4,8000021c +80000100: f4dff0ef jal ra,8000004c +80000104: 00052703 lw a4,0(a0) +80000108: 10e69a63 bne a3,a4,8000021c -800001b4 : -800001b4: 00d52023 sw a3,0(a0) -800001b8: 100527af lr.w a5,(a0) -800001bc: 18b5262f sc.w a2,a1,(a0) -800001c0: 12d79463 bne a5,a3,800002e8 -800001c4: 12061263 bnez a2,800002e8 -800001c8: 00052703 lw a4,0(a0) -800001cc: 10e59e63 bne a1,a4,800002e8 -800001d0: fffe8e93 addi t4,t4,-1 -800001d4: 00450513 addi a0,a0,4 -800001d8: 00358593 addi a1,a1,3 -800001dc: 00360613 addi a2,a2,3 -800001e0: 00368693 addi a3,a3,3 -800001e4: fc0e98e3 bnez t4,800001b4 -800001e8: 00900e13 li t3,9 -800001ec: 10000537 lui a0,0x10000 -800001f0: 10050513 addi a0,a0,256 # 10000100 -800001f4: 07b00593 li a1,123 -800001f8: 07c00613 li a2,124 -800001fc: 07d00693 li a3,125 -80000200: 00d52023 sw a3,0(a0) -80000204: 100527af lr.w a5,(a0) -80000208: 00000073 ecall -8000020c: 18b5262f sc.w a2,a1,(a0) -80000210: 00100713 li a4,1 -80000214: 0ce61a63 bne a2,a4,800002e8 -80000218: 00052703 lw a4,0(a0) -8000021c: 0ce69663 bne a3,a4,800002e8 -80000220: 00b00e13 li t3,11 -80000224: 10000537 lui a0,0x10000 -80000228: 30050513 addi a0,a0,768 # 10000300 -8000022c: 08200593 li a1,130 -80000230: 08300613 li a2,131 -80000234: 08400693 li a3,132 -80000238: 00d52023 sw a3,0(a0) -8000023c: 00001eb7 lui t4,0x1 -80000240: 800e8e93 addi t4,t4,-2048 # 800 -80000244: 304e9073 csrw mie,t4 -80000248: 00800e93 li t4,8 -8000024c: 100527af lr.w a5,(a0) -80000250: 300e9073 csrw mstatus,t4 -80000254: 00000013 nop -80000258: 00000013 nop -8000025c: 00000013 nop -80000260: 00000013 nop -80000264: 00000013 nop -80000268: 00000013 nop -8000026c: 18b5262f sc.w a2,a1,(a0) -80000270: 00100713 li a4,1 -80000274: 06e61a63 bne a2,a4,800002e8 -80000278: 00052703 lw a4,0(a0) -8000027c: 06e69663 bne a3,a4,800002e8 -80000280: 00c00e13 li t3,12 -80000284: 10000537 lui a0,0x10000 -80000288: 40050513 addi a0,a0,1024 # 10000400 -8000028c: 08c00593 li a1,140 -80000290: 08d00613 li a2,141 -80000294: 08e00693 li a3,142 -80000298: 00d52023 sw a3,0(a0) -8000029c: 00001eb7 lui t4,0x1 -800002a0: 800e8e93 addi t4,t4,-2048 # 800 -800002a4: 304e9073 csrw mie,t4 -800002a8: 00002eb7 lui t4,0x2 -800002ac: 808e8e93 addi t4,t4,-2040 # 1808 -800002b0: 100527af lr.w a5,(a0) -800002b4: 300e9073 csrw mstatus,t4 -800002b8: 00000013 nop -800002bc: 00000013 nop -800002c0: 00000013 nop -800002c4: 00000013 nop -800002c8: 00000013 nop -800002cc: 00000013 nop -800002d0: 18b5262f sc.w a2,a1,(a0) -800002d4: 00100713 li a4,1 -800002d8: 00e61863 bne a2,a4,800002e8 -800002dc: 00052703 lw a4,0(a0) -800002e0: 00e69463 bne a3,a4,800002e8 -800002e4: 0100006f j 800002f4 +8000010c : +8000010c: 00400e13 li t3,4 +80000110: 10000537 lui a0,0x10000 +80000114: 00850513 addi a0,a0,8 # 10000008 +80000118: 06a00593 li a1,106 +8000011c: 06b00613 li a2,107 +80000120: 06c00693 li a3,108 +80000124: 00d52023 sw a3,0(a0) +80000128: 100527af lr.w a5,(a0) +8000012c: 18b5262f sc.w a2,a1,(a0) +80000130: 0ed79663 bne a5,a3,8000021c +80000134: 0e061463 bnez a2,8000021c +80000138: 00052703 lw a4,0(a0) +8000013c: 0ee59063 bne a1,a4,8000021c +80000140: f0dff0ef jal ra,8000004c +80000144: 00052703 lw a4,0(a0) +80000148: 0ce59a63 bne a1,a4,8000021c -800002e8 : -800002e8: f0100137 lui sp,0xf0100 -800002ec: f2410113 addi sp,sp,-220 # f00fff24 -800002f0: 01c12023 sw t3,0(sp) +8000014c : +8000014c: 00500e13 li t3,5 +80000150: 10000537 lui a0,0x10000 +80000154: 00850513 addi a0,a0,8 # 10000008 +80000158: 06d00593 li a1,109 +8000015c: 06e00613 li a2,110 +80000160: 06f00693 li a3,111 +80000164: 00d52023 sw a3,0(a0) +80000168: 18b5262f sc.w a2,a1,(a0) +8000016c: 0a060863 beqz a2,8000021c +80000170: 00052703 lw a4,0(a0) +80000174: 0ae69463 bne a3,a4,8000021c +80000178: ed5ff0ef jal ra,8000004c +8000017c: 00052703 lw a4,0(a0) +80000180: 08e69e63 bne a3,a4,8000021c +80000184: 00700e13 li t3,7 +80000188: 10000537 lui a0,0x10000 +8000018c: 01450513 addi a0,a0,20 # 10000014 +80000190: 07800593 li a1,120 +80000194: 07900613 li a2,121 +80000198: 07a00693 li a3,122 +8000019c: 01000e93 li t4,16 -800002f4 : -800002f4: f0100137 lui sp,0xf0100 -800002f8: f2010113 addi sp,sp,-224 # f00fff20 -800002fc: 00012023 sw zero,0(sp) -80000300: 00000013 nop -80000304: 00000013 nop -80000308: 00000013 nop -8000030c: 00000013 nop -80000310: 00000013 nop -80000314: 00000013 nop +800001a0 : +800001a0: 00d52023 sw a3,0(a0) +800001a4: 100527af lr.w a5,(a0) +800001a8: 18b5262f sc.w a2,a1,(a0) +800001ac: 06d79863 bne a5,a3,8000021c +800001b0: 06061663 bnez a2,8000021c +800001b4: 00052703 lw a4,0(a0) +800001b8: 06e59263 bne a1,a4,8000021c +800001bc: fffe8e93 addi t4,t4,-1 +800001c0: 00450513 addi a0,a0,4 +800001c4: 00358593 addi a1,a1,3 +800001c8: 00360613 addi a2,a2,3 +800001cc: 00368693 addi a3,a3,3 +800001d0: fc0e98e3 bnez t4,800001a0 + +800001d4 : +800001d4: 00900e13 li t3,9 +800001d8: 10000537 lui a0,0x10000 +800001dc: 10050513 addi a0,a0,256 # 10000100 +800001e0: 07b00593 li a1,123 +800001e4: 07c00613 li a2,124 +800001e8: 07d00693 li a3,125 +800001ec: 00d52023 sw a3,0(a0) +800001f0: 100527af lr.w a5,(a0) +800001f4: 00000073 ecall +800001f8: 18b527af sc.w a5,a1,(a0) +800001fc: 00000713 li a4,0 +80000200: 00e79e63 bne a5,a4,8000021c +80000204: 00052703 lw a4,0(a0) +80000208: 00e59a63 bne a1,a4,8000021c +8000020c: e41ff0ef jal ra,8000004c +80000210: 00052703 lw a4,0(a0) +80000214: 00e59463 bne a1,a4,8000021c +80000218: 0100006f j 80000228 + +8000021c : +8000021c: f0100137 lui sp,0xf0100 +80000220: f2410113 addi sp,sp,-220 # f00fff24 +80000224: 01c12023 sw t3,0(sp) + +80000228 : +80000228: f0100137 lui sp,0xf0100 +8000022c: f2010113 addi sp,sp,-224 # f00fff20 +80000230: 00012023 sw zero,0(sp) +80000234: 00000013 nop +80000238: 00000013 nop +8000023c: 00000013 nop +80000240: 00000013 nop +80000244: 00000013 nop +80000248: 00000013 nop diff --git a/src/test/cpp/raw/lrsc/build/lrsc.hex b/src/test/cpp/raw/lrsc/build/lrsc.hex index 7b96205..b0ee273 100644 --- a/src/test/cpp/raw/lrsc/build/lrsc.hex +++ b/src/test/cpp/raw/lrsc/build/lrsc.hex @@ -1,53 +1,40 @@ :0200000480007A -:100000006F00C00413000000130000001300000084 +:100000006F00C00613000000130000001300000082 :100010001300000013000000130000001300000094 :10002000F32E003093FE0E08638A0E00B72E0000F8 :10003000938E0E8073900E3073002030F32E1034A8 -:10004000938E4E0073901E3473002030130E1000F8 -:100050003705001093054006130650069306600608 -:100060002320D5002F26B51813071000631EE6269F -:1000700003270500639AE626130E200037050010BB -:100080001305450093057006130680069306900637 -:100090002320D5002F26B518130710006316E62479 -:1000A000032705006392E624130E30003705001085 -:1000B0001305450093057006130680069306900607 -:1000C0002F26B518130710006310E622032705003A -:1000D000639CE620130E40003705001013058500D1 -:1000E0009305A0061306B0069306C0062320D5008C -:1000F000AF2705102F26B5186398D71E6316061E66 -:10010000032705006392E51E130E5000370500100B -:10011000130585009305D0061306E0069306F00646 -:100120002320D5002F26B5186310061C03270500D1 -:10013000639CE51A130E6000370500101305C50017 -:1001400093050007130610079306200737040010D5 -:10015000130404019304300713094007930950075F -:100160002320D50023203401AF270510AF2A041027 -:100170002F26B5182F2994186398D71663160616DC -:10018000032705006392E51663903A17631E09146E -:10019000032A0400639A4415130E700037050010FB -:1001A0001305450193058007130690079306A007E2 -:1001B000930E00012320D500AF2705102F26B51878 -:1001C0006394D7126312061203270500639EE5109D -:1001D000938EFEFF13054500938535001306360008 -:1001E00093863600E3980EFC130E9000370500103E -:1001F000130505109305B0071306C0079306D00733 -:100200002320D500AF270510730000002F26B51856 -:1002100013071000631AE60C032705006396E60C2B -:10022000130EB000370500101305053093052008A4 -:1002300013063008930640082320D500B71E00009F -:10024000938E0E8073904E30930E8000AF27051072 -:1002500073900E3013000000130000001300000024 -:100260001300000013000000130000002F26B51833 -:1002700013071000631AE606032705006396E606D7 -:10028000130EC00037050010130505409305C00884 -:100290001306D0089306E0082320D500B71E0000FF -:1002A000938E0E8073904E30B72E0000938E8E800A -:1002B000AF27051073900E301300000013000000EC -:1002C00013000000130000001300000013000000E2 -:1002D0002F26B518130710006318E6000327050042 -:1002E0006394E6006F000001370110F0130141F242 -:1002F0002320C101370110F0130101F22320010076 -:1003000013000000130000001300000013000000A1 -:080310001300000013000000BF -:040000058000004C2B +:10004000938E4E0073901E3473002030B702002050 +:10005000371300009303000203AE0200B382620074 +:100060009383F3FFE39A03FE67800000130E1000F2 +:1000700037050010930540061306500693066006E8 +:100080002320D5002F26B518130710006318E61893 +:10009000032705006394E618130E200037050010AF +:1000A0001305450093057006130680069306900617 +:1000B0002320D5002F26B518130710006310E6166D +:1000C00003270500639CE614EFF05FF803270500A3 +:1000D0006396E614130E3000370500101305450033 +:1000E0009305700613068006930690062F26B51812 +:1000F000130710006314E612032705006390E6124D +:10010000EFF0DFF403270500639AE610130E4000BA +:1001100037050010130585009305A0061306B006E9 +:100120009306C0062320D500AF2705102F26B5184B +:100130006396D70E6314060E032705006390E50E41 +:10014000EFF0DFF003270500639AE50C130E500073 +:1001500037050010130585009305D0061306E00649 +:100160009306F0062320D5002F26B5186308060A4B +:10017000032705006394E60AEFF05FED032705000F +:10018000639EE608130E7000370500101305450145 +:1001900093058007130690079306A007930E0001AE +:1001A0002320D500AF2705102F26B5186398D70652 +:1001B00063160606032705006392E506938EFEFF8D +:1001C00013054500938535001306360093863600E7 +:1001D000E3980EFC130E9000370500101305051070 +:1001E0009305B0071306C0079306D0072320D50058 +:1001F000AF27051073000000AF27B51813070000E4 +:10020000639EE70003270500639AE500EFF01FE413 +:10021000032705006394E5006F000001370110F02B +:10022000130141F22320C101370110F0130101F243 +:100230002320010013000000130000001300000041 +:0C02400013000000130000001300000079 +:040000058000006C0B :00000001FF diff --git a/src/test/cpp/raw/lrsc/src/crt.S b/src/test/cpp/raw/lrsc/src/crt.S index 7fef5e3..a19663f 100644 --- a/src/test/cpp/raw/lrsc/src/crt.S +++ b/src/test/cpp/raw/lrsc/src/crt.S @@ -25,8 +25,19 @@ notExternalInterrupt: csrw mepc, x29 mret +flush: + li t0, 0x20000000 + li t1, 0x1000 + li t2, 32 +flushLoop: + lw t3, 0(t0) + add t0, t0, t1 + addi t2,t2,-1 + bnez t2, flushLoop + ret + _start: -//Test 1 SC on unreserved area should fail and not write memory +test1: //Test 1 SC on unreserved area should fail and not write memory li x28, 1 li a0, 0x10000000 li a1, 100 @@ -39,7 +50,7 @@ _start: lw a4, 0(a0) bne a3, a4, fail -//Test 2 SC on another unreserved area should fail and not write memory +test2: //Test 2 SC on another unreserved area should fail and not write memory li x28, 2 li a0, 0x10000004 li a1, 103 @@ -51,9 +62,12 @@ _start: bne a2, a4, fail lw a4, 0(a0) bne a3, a4, fail + call flush + lw a4, 0(a0) + bne a3, a4, fail -//Test 3 retrying SC on unreserved area should fail and not write memory +test3: //Test 3 retrying SC on unreserved area should fail and not write memory li x28, 3 li a0, 0x10000004 li a1, 103 @@ -64,9 +78,12 @@ _start: bne a2, a4, fail lw a4, 0(a0) bne a3, a4, fail + call flush + lw a4, 0(a0) + bne a3, a4, fail -//Test 4 SC on reserved area should pass and should be written write memory +test4: //Test 4 SC on reserved area should pass and should be written write memory li x28, 4 li a0, 0x10000008 li a1, 106 @@ -79,9 +96,12 @@ _start: bne a2, x0, fail lw a4, 0(a0) bne a1, a4, fail + call flush + lw a4, 0(a0) + bne a1, a4, fail -//Test 5 redo SC on reserved area should pass and should be written write memory +test5: //Test 5 redo SC on reserved area should fail li x28, 5 li a0, 0x10000008 li a1, 109 @@ -89,36 +109,13 @@ _start: li a3, 111 sw a3, 0(a0) sc.w a2, a1, (a0) - bne a2, x0, fail + beq a2, x0, fail lw a4, 0(a0) - bne a1, a4, fail - -//Test 6 Allow two entries at the same time - li x28, 6 - li a0, 0x1000000C - li a1, 112 - li a2, 113 - li a3, 114 - li s0, 0x10000010 - li s1, 115 - li s2, 116 - li s3, 117 - - sw a3, 0(a0) - sw s3, 0(s0) - lr.w a5, (a0) - lr.w s5, (s0) - sc.w a2, a1, (a0) - sc.w s2, s1, (s0) - bne a5, a3, fail - bne a2, x0, fail + bne a3, a4, fail + call flush lw a4, 0(a0) - bne a1, a4, fail + bne a3, a4, fail - bne s5, s3, fail - bne s2, x0, fail - lw s4, 0(s0) - bne s1, s4, fail //Test 7 do a lot of allocation to clear the entries li x28, 7 @@ -157,7 +154,7 @@ test7: bne a5, a4, fail*/ -//Test 9 SC should fail after a context switching +test9: //Test 9 SC should pass after a context switching li x28, 9 li a0, 0x10000100 li a1, 123 @@ -166,11 +163,14 @@ test7: sw a3, 0(a0) lr.w a5, (a0) scall - sc.w a2, a1, (a0) - li a4, 1 - bne a2, a4, fail + sc.w a5, a1, (a0) + li a4, 0 + bne a5, a4, fail lw a4, 0(a0) - bne a3, a4, fail + bne a1, a4, fail + call flush + lw a4, 0(a0) + bne a1, a4, fail @@ -192,58 +192,6 @@ test7: bne a7, a4, fail*/ - -//Test 11 SC should fail after a external interrupt context switching - li x28, 11 - li a0, 0x10000300 - li a1, 130 - li a2, 131 - li a3, 132 - sw a3, 0(a0) - li x29, 0x800 //800 external interrupts - csrw mie,x29 - li x29, 0x008 //008 enable interrupts - lr.w a5, (a0) - csrw mstatus,x29 //Enable external interrupt (will jump instantly due to testbench setup) - nop - nop - nop - nop - nop - nop - sc.w a2, a1, (a0) - li a4, 1 - bne a2, a4, fail - lw a4, 0(a0) - bne a3, a4, fail - - -//Test 12 SC should fail after a external interrupt context switching (callback on lr) - li x28, 12 - li a0, 0x10000400 - li a1, 140 - li a2, 141 - li a3, 142 - sw a3, 0(a0) - li x29, 0x800 //800 external interrupts - csrw mie,x29 - li x29, 0x1808 //008 enable interrupts - lr.w a5, (a0) - csrw mstatus,x29 //Enable external interrupt (will jump instantly due to testbench setup) - nop - nop - nop - nop - nop - nop - sc.w a2, a1, (a0) - li a4, 1 - bne a2, a4, fail - lw a4, 0(a0) - bne a3, a4, fail - - - j pass diff --git a/src/test/cpp/raw/machineCsr/build/machineCsr.asm b/src/test/cpp/raw/machineCsr/build/machineCsr.asm index 4d80d75..679be70 100644 --- a/src/test/cpp/raw/machineCsr/build/machineCsr.asm +++ b/src/test/cpp/raw/machineCsr/build/machineCsr.asm @@ -29,26 +29,26 @@ Disassembly of section .crt_section: 80000044: 01de7f33 and t5,t3,t4 80000048: 000f1863 bnez t5,80000058 8000004c: 34102ef3 csrr t4,mepc -80000050: 004e8e93 addi t4,t4,4 # 80000004 +80000050: 004e8e93 addi t4,t4,4 # 80000004 80000054: 341e9073 csrw mepc,t4 80000058 : 80000058: 80000eb7 lui t4,0x80000 -8000005c: 003e8e93 addi t4,t4,3 # 80000003 +8000005c: 003e8e93 addi t4,t4,3 # 80000003 80000060: 01ce9863 bne t4,t3,80000070 80000064: f0013c37 lui s8,0xf0013 80000068: 00000c93 li s9,0 -8000006c: 019c2023 sw s9,0(s8) # f0013000 +8000006c: 019c2023 sw s9,0(s8) # f0013000 80000070 : 80000070: 80000eb7 lui t4,0x80000 -80000074: 007e8e93 addi t4,t4,7 # 80000007 +80000074: 007e8e93 addi t4,t4,7 # 80000007 80000078: 01ce9463 bne t4,t3,80000080 8000007c: 30405073 csrwi mie,0 80000080 : 80000080: 80000eb7 lui t4,0x80000 -80000084: 00be8e93 addi t4,t4,11 # 8000000b +80000084: 00be8e93 addi t4,t4,11 # 8000000b 80000088: 01ce9463 bne t4,t3,80000090 8000008c: 30405073 csrwi mie,0 @@ -65,7 +65,7 @@ Disassembly of section .crt_section: 800000ac: 30429073 csrw mie,t0 800000b0: f0013c37 lui s8,0xf0013 800000b4: 00100c93 li s9,1 -800000b8: 019c2023 sw s9,0(s8) # f0013000 +800000b8: 019c2023 sw s9,0(s8) # f0013000 800000bc: 00000013 nop 800000c0: 00000013 nop 800000c4: 00000013 nop @@ -101,41 +101,55 @@ Disassembly of section .crt_section: 8000013c: 00000013 nop 80000140: 00500e13 li t3,5 80000144: f01001b7 lui gp,0xf0100 -80000148: f4018193 addi gp,gp,-192 # f00fff40 +80000148: f4018193 addi gp,gp,-192 # f00fff40 8000014c: 0001a203 lw tp,0(gp) 80000150: 0041a283 lw t0,4(gp) 80000154: 3ff20213 addi tp,tp,1023 # 3ff 80000158: 0041a423 sw tp,8(gp) 8000015c: 0051a623 sw t0,12(gp) -80000160: 00600e13 li t3,6 -80000164: 08000213 li tp,128 -80000168: 30421073 csrw mie,tp -8000016c: 00700e13 li t3,7 -80000170: 10500073 wfi -80000174: 00800e13 li t3,8 -80000178: 00100193 li gp,1 -8000017c: 0041a023 sw tp,0(gp) -80000180: 00900e13 li t3,9 -80000184: 00419023 sh tp,0(gp) -80000188: 00a00e13 li t3,10 -8000018c: 0001a203 lw tp,0(gp) -80000190: 00b00e13 li t3,11 -80000194: 00019203 lh tp,0(gp) -80000198: 00c00e13 li t3,12 -8000019c: 00d00e13 li t3,13 -800001a0: 00002083 lw ra,0(zero) # 0 +80000160: 00000013 nop +80000164: 00000013 nop +80000168: 00000013 nop +8000016c: 00000013 nop +80000170: 00000013 nop +80000174: 00000013 nop +80000178: 00000013 nop +8000017c: 00000013 nop +80000180: 00000013 nop +80000184: 00000013 nop +80000188: 00000013 nop +8000018c: 00000013 nop +80000190: 00000013 nop +80000194: 00000013 nop +80000198: 00600e13 li t3,6 +8000019c: 08000213 li tp,128 +800001a0: 30421073 csrw mie,tp +800001a4: 00700e13 li t3,7 +800001a8: 10500073 wfi +800001ac: 00800e13 li t3,8 +800001b0: 00100193 li gp,1 +800001b4: 0041a023 sw tp,0(gp) +800001b8: 00900e13 li t3,9 +800001bc: 00419023 sh tp,0(gp) +800001c0: 00a00e13 li t3,10 +800001c4: 0001a203 lw tp,0(gp) +800001c8: 00b00e13 li t3,11 +800001cc: 00019203 lh tp,0(gp) +800001d0: 00c00e13 li t3,12 +800001d4: 00d00e13 li t3,13 +800001d8: 00002083 lw ra,0(zero) # 0 -800001a4 : -800001a4: 0020006f j 800001a6 -800001a8: 00002083 lw ra,0(zero) # 0 -800001ac: 00e00e13 li t3,14 -800001b0: 20200073 hret -800001b4: 00f00e13 li t3,15 -800001b8: f01000b7 lui ra,0xf0100 -800001bc: f6008093 addi ra,ra,-160 # f00fff60 -800001c0: 0000a103 lw sp,0(ra) -800001c4: 01000e13 li t3,16 -800001c8: 0020a023 sw sp,0(ra) -800001cc: 01100e13 li t3,17 -800001d0: 00008067 ret +800001dc : +800001dc: 0020006f j 800001de +800001e0: 00002083 lw ra,0(zero) # 0 +800001e4: 00e00e13 li t3,14 +800001e8: 20200073 hret +800001ec: 00f00e13 li t3,15 +800001f0: f01000b7 lui ra,0xf0100 +800001f4: f6008093 addi ra,ra,-160 # f00fff60 +800001f8: 0000a103 lw sp,0(ra) +800001fc: 01000e13 li t3,16 +80000200: 0020a023 sw sp,0(ra) +80000204: 01100e13 li t3,17 +80000208: 00008067 ret ... diff --git a/src/test/cpp/raw/machineCsr/build/machineCsr.hex b/src/test/cpp/raw/machineCsr/build/machineCsr.hex index d104c88..d6c33e7 100644 --- a/src/test/cpp/raw/machineCsr/build/machineCsr.hex +++ b/src/test/cpp/raw/machineCsr/build/machineCsr.hex @@ -21,13 +21,17 @@ :100130001300000013000000130000001300000073 :10014000130E5000B70110F0938101F403A20100D7 :1001500083A241001302F23F23A4410023A65100D1 -:10016000130E60001302000873104230130E70006B -:1001700073005010130E80009301100023A0410063 -:10018000130E900023904100130EA00003A2010063 -:10019000130EB00003920100130EC000130ED00026 -:1001A000832000006F00200083200000130EE00079 -:1001B00073002020130EF000B70010F0938000F6BB -:1001C00003A10000130E000123A02000130E100154 -:1001D0006780000000000000000000000000000038 +:100160001300000013000000130000001300000043 +:100170001300000013000000130000001300000033 +:100180001300000013000000130000001300000023 +:100190001300000013000000130E6000130200089B +:1001A00073104230130E700073005010130E800055 +:1001B0009301100023A04100130E900023904100F2 +:1001C000130EA00003A20100130EB0000392010061 +:1001D000130EC000130ED000832000006F0020001B +:1001E00083200000130EE00073002020130EF000A7 +:1001F000B70010F0938000F603A10000130E000179 +:1002000023A02000130E10016780000000000000F2 +:1002100000000000000000000000000000000000DE :0400000580000094E3 :00000001FF diff --git a/src/test/cpp/raw/machineCsr/build/machineCsrCompressed.asm b/src/test/cpp/raw/machineCsr/build/machineCsrCompressed.asm index df9e96f..097f4e3 100644 --- a/src/test/cpp/raw/machineCsr/build/machineCsrCompressed.asm +++ b/src/test/cpp/raw/machineCsr/build/machineCsrCompressed.asm @@ -5,8 +5,7 @@ build/machineCsrCompressed.elf: file format elf32-littleriscv Disassembly of section .crt_section: 80000000 : -80000000: a071 j 8000008c <_start> -80000002: 0001 nop +80000000: 0940006f j 80000094 <_start> 80000004: 00000013 nop 80000008: 00000013 nop 8000000c: 00000013 nop @@ -17,123 +16,140 @@ Disassembly of section .crt_section: 80000020 : 80000020: 34202e73 csrr t3,mcause -80000024: 000e1c63 bnez t3,8000003c +80000024: 000e1e63 bnez t3,80000040 80000028: ffc00f13 li t5,-4 8000002c: 34102ef3 csrr t4,mepc 80000030: 01eefeb3 and t4,t4,t5 -80000034: 0e91 addi t4,t4,4 -80000036: 341e9073 csrw mepc,t4 -8000003a: a821 j 80000052 +80000034: 004e8e93 addi t4,t4,4 +80000038: 341e9073 csrw mepc,t4 +8000003c: 01c0006f j 80000058 -8000003c : -8000003c: 80000eb7 lui t4,0x80000 -80000040: 01de7f33 and t5,t3,t4 -80000044: 000f1763 bnez t5,80000052 -80000048: 34102ef3 csrr t4,mepc -8000004c: 0e91 addi t4,t4,4 -8000004e: 341e9073 csrw mepc,t4 +80000040 : +80000040: 80000eb7 lui t4,0x80000 +80000044: 01de7f33 and t5,t3,t4 +80000048: 000f1863 bnez t5,80000058 +8000004c: 34102ef3 csrr t4,mepc +80000050: 004e8e93 addi t4,t4,4 # 80000004 +80000054: 341e9073 csrw mepc,t4 -80000052 : -80000052: 80000eb7 lui t4,0x80000 -80000056: 003e8e93 addi t4,t4,3 # 80000003 <_start+0xffffff77> -8000005a: 01ce9763 bne t4,t3,80000068 -8000005e: f0013c37 lui s8,0xf0013 -80000062: 4c81 li s9,0 -80000064: 019c2023 sw s9,0(s8) # f0013000 <_start+0x70012f74> +80000058 : +80000058: 80000eb7 lui t4,0x80000 +8000005c: 003e8e93 addi t4,t4,3 # 80000003 +80000060: 01ce9863 bne t4,t3,80000070 +80000064: f0013c37 lui s8,0xf0013 +80000068: 00000c93 li s9,0 +8000006c: 019c2023 sw s9,0(s8) # f0013000 -80000068 : -80000068: 80000eb7 lui t4,0x80000 -8000006c: 007e8e93 addi t4,t4,7 # 80000007 <_start+0xffffff7b> -80000070: 01ce9463 bne t4,t3,80000078 -80000074: 30405073 csrwi mie,0 +80000070 : +80000070: 80000eb7 lui t4,0x80000 +80000074: 007e8e93 addi t4,t4,7 # 80000007 +80000078: 01ce9463 bne t4,t3,80000080 +8000007c: 30405073 csrwi mie,0 -80000078 : -80000078: 80000eb7 lui t4,0x80000 -8000007c: 00be8e93 addi t4,t4,11 # 8000000b <_start+0xffffff7f> -80000080: 01ce9463 bne t4,t3,80000088 -80000084: 30405073 csrwi mie,0 +80000080 : +80000080: 80000eb7 lui t4,0x80000 +80000084: 00be8e93 addi t4,t4,11 # 8000000b +80000088: 01ce9463 bne t4,t3,80000090 +8000008c: 30405073 csrwi mie,0 -80000088 : -80000088: 30200073 mret +80000090 : +80000090: 30200073 mret -8000008c <_start>: -8000008c: 4e05 li t3,1 -8000008e: 00000073 ecall -80000092: 4e09 li t3,2 -80000094: 42a1 li t0,8 -80000096: 3002a073 csrs mstatus,t0 -8000009a: 42a1 li t0,8 -8000009c: 30429073 csrw mie,t0 -800000a0: f0013c37 lui s8,0xf0013 -800000a4: 4c85 li s9,1 -800000a6: 019c2023 sw s9,0(s8) # f0013000 <_start+0x70012f74> -800000aa: 0001 nop -800000ac: 0001 nop -800000ae: 0001 nop -800000b0: 0001 nop -800000b2: 0001 nop -800000b4: 0001 nop -800000b6: 0001 nop -800000b8: 0001 nop -800000ba: 0001 nop -800000bc: 0001 nop -800000be: 0001 nop -800000c0: 0001 nop -800000c2: 4e0d li t3,3 -800000c4: 08000293 li t0,128 -800000c8: 30429073 csrw mie,t0 -800000cc: 0001 nop -800000ce: 0001 nop -800000d0: 0001 nop -800000d2: 0001 nop -800000d4: 0001 nop -800000d6: 0001 nop -800000d8: 0001 nop -800000da: 4e11 li t3,4 -800000dc: 000012b7 lui t0,0x1 -800000e0: 80028293 addi t0,t0,-2048 # 800 -800000e4: 30429073 csrw mie,t0 -800000e8: 0001 nop -800000ea: 0001 nop -800000ec: 0001 nop -800000ee: 0001 nop -800000f0: 0001 nop -800000f2: 0001 nop -800000f4: 0001 nop -800000f6: 4e15 li t3,5 -800000f8: f01001b7 lui gp,0xf0100 -800000fc: f4018193 addi gp,gp,-192 # f00fff40 <_start+0x700ffeb4> -80000100: 0001a203 lw tp,0(gp) -80000104: 0041a283 lw t0,4(gp) -80000108: 3ff20213 addi tp,tp,1023 # 3ff -8000010c: 0041a423 sw tp,8(gp) -80000110: 0051a623 sw t0,12(gp) -80000114: 4e19 li t3,6 -80000116: 08000213 li tp,128 -8000011a: 30421073 csrw mie,tp -8000011e: 4e1d li t3,7 -80000120: 10500073 wfi -80000124: 4e21 li t3,8 -80000126: 4185 li gp,1 -80000128: 0041a023 sw tp,0(gp) -8000012c: 4e25 li t3,9 -8000012e: 00419023 sh tp,0(gp) -80000132: 4e29 li t3,10 -80000134: 0001a203 lw tp,0(gp) -80000138: 4e2d li t3,11 -8000013a: 00019203 lh tp,0(gp) -8000013e: 4e31 li t3,12 -80000140: 4e35 li t3,13 -80000142: 00002083 lw ra,0(zero) # 0 -80000146: 00002083 lw ra,0(zero) # 0 -8000014a: 4e39 li t3,14 -8000014c: 20200073 hret -80000150: 4e3d li t3,15 -80000152: f01000b7 lui ra,0xf0100 -80000156: f6008093 addi ra,ra,-160 # f00fff60 <_start+0x700ffed4> -8000015a: 0000a103 lw sp,0(ra) -8000015e: 4e41 li t3,16 -80000160: 0020a023 sw sp,0(ra) -80000164: 4e45 li t3,17 -80000166: 8082 ret +80000094 <_start>: +80000094: 00100e13 li t3,1 +80000098: 00000073 ecall +8000009c: 00200e13 li t3,2 +800000a0: 00800293 li t0,8 +800000a4: 3002a073 csrs mstatus,t0 +800000a8: 00800293 li t0,8 +800000ac: 30429073 csrw mie,t0 +800000b0: f0013c37 lui s8,0xf0013 +800000b4: 00100c93 li s9,1 +800000b8: 019c2023 sw s9,0(s8) # f0013000 +800000bc: 00000013 nop +800000c0: 00000013 nop +800000c4: 00000013 nop +800000c8: 00000013 nop +800000cc: 00000013 nop +800000d0: 00000013 nop +800000d4: 00000013 nop +800000d8: 00000013 nop +800000dc: 00000013 nop +800000e0: 00000013 nop +800000e4: 00000013 nop +800000e8: 00000013 nop +800000ec: 00300e13 li t3,3 +800000f0: 08000293 li t0,128 +800000f4: 30429073 csrw mie,t0 +800000f8: 00000013 nop +800000fc: 00000013 nop +80000100: 00000013 nop +80000104: 00000013 nop +80000108: 00000013 nop +8000010c: 00000013 nop +80000110: 00000013 nop +80000114: 00400e13 li t3,4 +80000118: 000012b7 lui t0,0x1 +8000011c: 80028293 addi t0,t0,-2048 # 800 +80000120: 30429073 csrw mie,t0 +80000124: 00000013 nop +80000128: 00000013 nop +8000012c: 00000013 nop +80000130: 00000013 nop +80000134: 00000013 nop +80000138: 00000013 nop +8000013c: 00000013 nop +80000140: 00500e13 li t3,5 +80000144: f01001b7 lui gp,0xf0100 +80000148: f4018193 addi gp,gp,-192 # f00fff40 +8000014c: 0001a203 lw tp,0(gp) +80000150: 0041a283 lw t0,4(gp) +80000154: 3ff20213 addi tp,tp,1023 # 3ff +80000158: 0041a423 sw tp,8(gp) +8000015c: 0051a623 sw t0,12(gp) +80000160: 00000013 nop +80000164: 00000013 nop +80000168: 00000013 nop +8000016c: 00000013 nop +80000170: 00000013 nop +80000174: 00000013 nop +80000178: 00000013 nop +8000017c: 00000013 nop +80000180: 00000013 nop +80000184: 00000013 nop +80000188: 00000013 nop +8000018c: 00000013 nop +80000190: 00000013 nop +80000194: 00000013 nop +80000198: 00600e13 li t3,6 +8000019c: 08000213 li tp,128 +800001a0: 30421073 csrw mie,tp +800001a4: 00700e13 li t3,7 +800001a8: 10500073 wfi +800001ac: 00800e13 li t3,8 +800001b0: 00100193 li gp,1 +800001b4: 0041a023 sw tp,0(gp) +800001b8: 00900e13 li t3,9 +800001bc: 00419023 sh tp,0(gp) +800001c0: 00a00e13 li t3,10 +800001c4: 0001a203 lw tp,0(gp) +800001c8: 00b00e13 li t3,11 +800001cc: 00019203 lh tp,0(gp) +800001d0: 00c00e13 li t3,12 +800001d4: 00d00e13 li t3,13 +800001d8: 00002083 lw ra,0(zero) # 0 + +800001dc : +800001dc: 0020006f j 800001de +800001e0: 00002083 lw ra,0(zero) # 0 +800001e4: 00e00e13 li t3,14 +800001e8: 20200073 hret +800001ec: 00f00e13 li t3,15 +800001f0: f01000b7 lui ra,0xf0100 +800001f4: f6008093 addi ra,ra,-160 # f00fff60 +800001f8: 0000a103 lw sp,0(ra) +800001fc: 01000e13 li t3,16 +80000200: 0020a023 sw sp,0(ra) +80000204: 01100e13 li t3,17 +80000208: 00008067 ret ... diff --git a/src/test/cpp/raw/machineCsr/build/machineCsrCompressed.hex b/src/test/cpp/raw/machineCsr/build/machineCsrCompressed.hex index 1546672..d6c33e7 100644 --- a/src/test/cpp/raw/machineCsr/build/machineCsrCompressed.hex +++ b/src/test/cpp/raw/machineCsr/build/machineCsrCompressed.hex @@ -1,27 +1,37 @@ :0200000480007A -:1000000071A00100130000001300000013000000A5 +:100000006F004009130000001300000013000000FF :100010001300000013000000130000001300000094 -:10002000732E2034631C0E00130FC0FFF32E103408 -:10003000B3FEEE01910E73901E3421A8B70E00801E -:10004000337FDE0163170F00F32E1034910E73908F -:100050001E34B70E0080938E3E006397CE01373C6E -:1000600001F0814C23209C01B70E0080938E7E000E -:100070006394CE0173504030B70E0080938EBE0063 -:100080006394CE017350403073002030054E7300EE -:100090000000094EA14273A00230A1427390423089 -:1000A000373C01F0854C23209C0101000100010038 -:1000B0000100010001000100010001000100010038 -:1000C00001000D4E930200087390423001000100C0 -:1000D00001000100010001000100114EB7120000F3 -:1000E0009382028073904230010001000100010000 -:1000F000010001000100154EB70110F0938101F4D9 -:1001000003A2010083A241001302F23F23A4410095 -:1001100023A65100194E13020008731042301D4EE1 -:1001200073005010214E854123A04100254E23909D -:100130004100294E03A201002D4E03920100314ED1 -:10014000354E8320000083200000394E73002020AC -:100150003D4EB70010F0938000F603A10000414E21 -:1001600023A02000454E8280000000000000000017 -:10017000000000000000000000000000000000007F -:040000058000008CEB +:10002000732E2034631E0E00130FC0FFF32E103406 +:10003000B3FEEE01938E4E0073901E346F00C0012C +:10004000B70E0080337FDE0163180F00F32E1034EB +:10005000938E4E0073901E34B70E0080938E3E0038 +:100060006398CE01373C01F0930C000023209C01E3 +:10007000B70E0080938E7E006394CE0173504030A3 +:10008000B70E0080938EBE006394CE017350403053 +:1000900073002030130E100073000000130E2000B8 +:1000A0009302800073A0023093028000739042306C +:1000B000373C01F0930C100023209C01130000003A +:1000C00013000000130000001300000013000000E4 +:1000D00013000000130000001300000013000000D4 +:1000E000130000001300000013000000130E300086 +:1000F00093020008739042301300000013000000C8 +:1001000013000000130000001300000013000000A3 +:1001100013000000130E4000B7120000938202800B +:100120007390423013000000130000001300000021 +:100130001300000013000000130000001300000073 +:10014000130E5000B70110F0938101F403A20100D7 +:1001500083A241001302F23F23A4410023A65100D1 +:100160001300000013000000130000001300000043 +:100170001300000013000000130000001300000033 +:100180001300000013000000130000001300000023 +:100190001300000013000000130E6000130200089B +:1001A00073104230130E700073005010130E800055 +:1001B0009301100023A04100130E900023904100F2 +:1001C000130EA00003A20100130EB0000392010061 +:1001D000130EC000130ED000832000006F0020001B +:1001E00083200000130EE00073002020130EF000A7 +:1001F000B70010F0938000F603A10000130E000179 +:1002000023A02000130E10016780000000000000F2 +:1002100000000000000000000000000000000000DE +:0400000580000094E3 :00000001FF diff --git a/src/test/cpp/raw/machineCsr/src/crt.S b/src/test/cpp/raw/machineCsr/src/crt.S index bbe966d..91429db 100644 --- a/src/test/cpp/raw/machineCsr/src/crt.S +++ b/src/test/cpp/raw/machineCsr/src/crt.S @@ -102,6 +102,20 @@ _start: addi x4, x4, 1023 sw x4, 8(x3) sw x5, 12(x3) + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop li x28, 6 li x4, 0x080 csrw mie,x4 diff --git a/src/test/cpp/raw/smp/.gitignore b/src/test/cpp/raw/smp/.gitignore new file mode 100644 index 0000000..a7caa3b --- /dev/null +++ b/src/test/cpp/raw/smp/.gitignore @@ -0,0 +1,6 @@ +*.map +*.v +*.elf +*.o +*.hex +!*.bin \ No newline at end of file diff --git a/src/test/cpp/raw/smp/build/smp.asm b/src/test/cpp/raw/smp/build/smp.asm new file mode 100644 index 0000000..b0492f9 --- /dev/null +++ b/src/test/cpp/raw/smp/build/smp.asm @@ -0,0 +1,441 @@ + +build/smp.elf: file format elf32-littleriscv + + +Disassembly of section .crt_section: + +80000000 <_start>: +80000000: f1402473 csrr s0,mhartid +80000004: f80002b7 lui t0,0xf8000 +80000008: f1402373 csrr t1,mhartid +8000000c: 01031313 slli t1,t1,0x10 +80000010: 006282b3 add t0,t0,t1 +80000014: 0082a023 sw s0,0(t0) # f8000000 + +80000018 : +80000018: 00100513 li a0,1 +8000001c: 00000597 auipc a1,0x0 +80000020: 42058593 addi a1,a1,1056 # 8000043c +80000024: 00a5a02f amoadd.w zero,a0,(a1) + +80000028 : +80000028: 00000417 auipc s0,0x0 +8000002c: 41442403 lw s0,1044(s0) # 8000043c +80000030: 19000513 li a0,400 +80000034: 3fc000ef jal ra,80000430 +80000038: 00000497 auipc s1,0x0 +8000003c: 4044a483 lw s1,1028(s1) # 8000043c +80000040: fe8494e3 bne s1,s0,80000028 +80000044: f80002b7 lui t0,0xf8000 +80000048: 00428293 addi t0,t0,4 # f8000004 +8000004c: f1402373 csrr t1,mhartid +80000050: 01031313 slli t1,t1,0x10 +80000054: 006282b3 add t0,t0,t1 +80000058: 0092a023 sw s1,0(t0) + +8000005c : +8000005c: 00100513 li a0,1 +80000060: 290000ef jal ra,800002f0 +80000064: 00200513 li a0,2 +80000068: 288000ef jal ra,800002f0 +8000006c: 00300513 li a0,3 +80000070: 280000ef jal ra,800002f0 +80000074: 00400513 li a0,4 +80000078: 2f4000ef jal ra,8000036c +8000007c: 00500513 li a0,5 +80000080: 2ec000ef jal ra,8000036c +80000084: 00600513 li a0,6 +80000088: 2e4000ef jal ra,8000036c +8000008c: 00700513 li a0,7 +80000090: 260000ef jal ra,800002f0 +80000094: 00800513 li a0,8 +80000098: 2d4000ef jal ra,8000036c +8000009c: 00000197 auipc gp,0x0 +800000a0: 3ac1a183 lw gp,940(gp) # 80000448 + +800000a4 : +800000a4: 00000297 auipc t0,0x0 +800000a8: 06828293 addi t0,t0,104 # 8000010c +800000ac: 00000317 auipc t1,0x0 +800000b0: 48532a23 sw t0,1172(t1) # 80000540 +800000b4: 00000297 auipc t0,0x0 +800000b8: 06028293 addi t0,t0,96 # 80000114 +800000bc: 00000317 auipc t1,0x0 +800000c0: 48532423 sw t0,1160(t1) # 80000544 +800000c4: 00000297 auipc t0,0x0 +800000c8: 01428293 addi t0,t0,20 # 800000d8 +800000cc: 00000317 auipc t1,0x0 +800000d0: 46532e23 sw t0,1148(t1) # 80000548 +800000d4: 0640006f j 80000138 + +800000d8 : +800000d8: 00000297 auipc t0,0x0 +800000dc: 03428293 addi t0,t0,52 # 8000010c +800000e0: 00000317 auipc t1,0x0 +800000e4: 46532023 sw t0,1120(t1) # 80000540 +800000e8: 00000297 auipc t0,0x0 +800000ec: 04028293 addi t0,t0,64 # 80000128 +800000f0: 00000317 auipc t1,0x0 +800000f4: 44532a23 sw t0,1108(t1) # 80000544 +800000f8: 00000297 auipc t0,0x0 +800000fc: 2f428293 addi t0,t0,756 # 800003ec +80000100: 00000317 auipc t1,0x0 +80000104: 44532423 sw t0,1096(t1) # 80000548 +80000108: 0300006f j 80000138 + +8000010c : +8000010c: 0004a983 lw s3,0(s1) +80000110: 0c40006f j 800001d4 + +80000114 : +80000114: 01242023 sw s2,0(s0) +80000118: 0120000f fence w,r +8000011c: 0004a983 lw s3,0(s1) +80000120: 05342023 sw s3,64(s0) +80000124: 0cc0006f j 800001f0 + +80000128 : +80000128: 01242023 sw s2,0(s0) +8000012c: 1204a9af lr.w.rl s3,(s1) +80000130: 05342023 sw s3,64(s0) +80000134: 0bc0006f j 800001f0 + +80000138 : +80000138: 00018513 mv a0,gp +8000013c: 00118193 addi gp,gp,1 +80000140: 22c000ef jal ra,8000036c +80000144: 00000297 auipc t0,0x0 +80000148: 3002a823 sw zero,784(t0) # 80000454 + +8000014c : +8000014c: 00018513 mv a0,gp +80000150: 00118193 addi gp,gp,1 +80000154: 218000ef jal ra,8000036c +80000158: 00000297 auipc t0,0x0 +8000015c: 2fc2a283 lw t0,764(t0) # 80000454 +80000160: 03200313 li t1,50 +80000164: 1662da63 bge t0,t1,800002d8 +80000168: 00000297 auipc t0,0x0 +8000016c: 2e42a283 lw t0,740(t0) # 8000044c +80000170: 00000317 auipc t1,0x0 +80000174: 2e032303 lw t1,736(t1) # 80000450 +80000178: 06628c63 beq t0,t1,800001f0 +8000017c: f14022f3 csrr t0,mhartid +80000180: 00000317 auipc t1,0x0 +80000184: 2cc32303 lw t1,716(t1) # 8000044c +80000188: 00000417 auipc s0,0x0 +8000018c: 33840413 addi s0,s0,824 # 800004c0 +80000190: 00000497 auipc s1,0x0 +80000194: 33448493 addi s1,s1,820 # 800004c4 +80000198: 02628863 beq t0,t1,800001c8 +8000019c: 00000317 auipc t1,0x0 +800001a0: 2b432303 lw t1,692(t1) # 80000450 +800001a4: 00000417 auipc s0,0x0 +800001a8: 32040413 addi s0,s0,800 # 800004c4 +800001ac: 00000497 auipc s1,0x0 +800001b0: 31448493 addi s1,s1,788 # 800004c0 +800001b4: 00628a63 beq t0,t1,800001c8 + +800001b8 : +800001b8: 00018513 mv a0,gp +800001bc: 00118193 addi gp,gp,1 +800001c0: 1ac000ef jal ra,8000036c +800001c4: 02c0006f j 800001f0 + +800001c8 : +800001c8: 00000297 auipc t0,0x0 +800001cc: 3782a283 lw t0,888(t0) # 80000540 +800001d0: 000280e7 jalr t0 + +800001d4 : +800001d4: 29a00913 li s2,666 +800001d8: 00018513 mv a0,gp +800001dc: 00118193 addi gp,gp,1 +800001e0: 18c000ef jal ra,8000036c +800001e4: 00000297 auipc t0,0x0 +800001e8: 3602a283 lw t0,864(t0) # 80000544 +800001ec: 000280e7 jalr t0 + +800001f0 : +800001f0: 0330000f fence rw,rw +800001f4: 00018513 mv a0,gp +800001f8: 00118193 addi gp,gp,1 +800001fc: 170000ef jal ra,8000036c +80000200: f14022f3 csrr t0,mhartid +80000204: f40294e3 bnez t0,8000014c + +80000208 : +80000208: 00000297 auipc t0,0x0 +8000020c: 2442a283 lw t0,580(t0) # 8000044c +80000210: 00000317 auipc t1,0x0 +80000214: 24032303 lw t1,576(t1) # 80000450 +80000218: 04628263 beq t0,t1,8000025c +8000021c: 00000517 auipc a0,0x0 +80000220: 2e852503 lw a0,744(a0) # 80000504 +80000224: f80002b7 lui t0,0xf8000 +80000228: 01428293 addi t0,t0,20 # f8000014 +8000022c: f1402373 csrr t1,mhartid +80000230: 01031313 slli t1,t1,0x10 +80000234: 006282b3 add t0,t0,t1 +80000238: 00a2a023 sw a0,0(t0) +8000023c: 00000517 auipc a0,0x0 +80000240: 2c452503 lw a0,708(a0) # 80000500 +80000244: f80002b7 lui t0,0xf8000 +80000248: 01428293 addi t0,t0,20 # f8000014 +8000024c: f1402373 csrr t1,mhartid +80000250: 01031313 slli t1,t1,0x10 +80000254: 006282b3 add t0,t0,t1 +80000258: 00a2a023 sw a0,0(t0) + +8000025c : +8000025c: f14022f3 csrr t0,mhartid +80000260: ee0296e3 bnez t0,8000014c +80000264: 00000297 auipc t0,0x0 +80000268: 2402ae23 sw zero,604(t0) # 800004c0 +8000026c: 00000297 auipc t0,0x0 +80000270: 2402ac23 sw zero,600(t0) # 800004c4 +80000274: 00000417 auipc s0,0x0 +80000278: 1c842403 lw s0,456(s0) # 8000043c +8000027c: 00000297 auipc t0,0x0 +80000280: 1d42a283 lw t0,468(t0) # 80000450 +80000284: 00128293 addi t0,t0,1 +80000288: 00000317 auipc t1,0x0 +8000028c: 1c532423 sw t0,456(t1) # 80000450 +80000290: 04829063 bne t0,s0,800002d0 +80000294: 00000317 auipc t1,0x0 +80000298: 1a032e23 sw zero,444(t1) # 80000450 +8000029c: 00000297 auipc t0,0x0 +800002a0: 1b02a283 lw t0,432(t0) # 8000044c +800002a4: 00128293 addi t0,t0,1 +800002a8: 00000317 auipc t1,0x0 +800002ac: 1a532223 sw t0,420(t1) # 8000044c +800002b0: 02829063 bne t0,s0,800002d0 +800002b4: 00000317 auipc t1,0x0 +800002b8: 18032c23 sw zero,408(t1) # 8000044c +800002bc: 00000297 auipc t0,0x0 +800002c0: 1982a283 lw t0,408(t0) # 80000454 +800002c4: 00128293 addi t0,t0,1 +800002c8: 00000317 auipc t1,0x0 +800002cc: 18532623 sw t0,396(t1) # 80000454 + +800002d0 : +800002d0: 0130000f fence w,rw +800002d4: e79ff06f j 8000014c + +800002d8 : +800002d8: 00000417 auipc s0,0x0 +800002dc: 27042403 lw s0,624(s0) # 80000548 +800002e0: 00018513 mv a0,gp +800002e4: 00118193 addi gp,gp,1 +800002e8: 084000ef jal ra,8000036c +800002ec: 000400e7 jalr s0 + +800002f0 : +800002f0: f80002b7 lui t0,0xf8000 +800002f4: 00c28293 addi t0,t0,12 # f800000c +800002f8: f1402373 csrr t1,mhartid +800002fc: 01031313 slli t1,t1,0x10 +80000300: 006282b3 add t0,t0,t1 +80000304: 00a2a023 sw a0,0(t0) +80000308: 00000e97 auipc t4,0x0 +8000030c: 13ceae83 lw t4,316(t4) # 80000444 +80000310: 00000297 auipc t0,0x0 +80000314: 13028293 addi t0,t0,304 # 80000440 +80000318: 00100313 li t1,1 +8000031c: 0062a2af amoadd.w t0,t1,(t0) +80000320: 00128293 addi t0,t0,1 +80000324: 00000317 auipc t1,0x0 +80000328: 11832303 lw t1,280(t1) # 8000043c +8000032c: 00629c63 bne t0,t1,80000344 +80000330: 001e8293 addi t0,t4,1 +80000334: 00000317 auipc t1,0x0 +80000338: 10032623 sw zero,268(t1) # 80000440 +8000033c: 00000317 auipc t1,0x0 +80000340: 10532423 sw t0,264(t1) # 80000444 + +80000344 : +80000344: 00000297 auipc t0,0x0 +80000348: 1002a283 lw t0,256(t0) # 80000444 +8000034c: ffd28ce3 beq t0,t4,80000344 +80000350: f80002b7 lui t0,0xf8000 +80000354: 01028293 addi t0,t0,16 # f8000010 +80000358: f1402373 csrr t1,mhartid +8000035c: 01031313 slli t1,t1,0x10 +80000360: 006282b3 add t0,t0,t1 +80000364: 00a2a023 sw a0,0(t0) +80000368: 00008067 ret + +8000036c : +8000036c: f80002b7 lui t0,0xf8000 +80000370: 00c28293 addi t0,t0,12 # f800000c +80000374: f1402373 csrr t1,mhartid +80000378: 01031313 slli t1,t1,0x10 +8000037c: 006282b3 add t0,t0,t1 +80000380: 00a2a023 sw a0,0(t0) +80000384: 00000e97 auipc t4,0x0 +80000388: 0c0eae83 lw t4,192(t4) # 80000444 +8000038c: 00000297 auipc t0,0x0 +80000390: 0b428293 addi t0,t0,180 # 80000440 + +80000394 : +80000394: 1002a32f lr.w t1,(t0) +80000398: 00130313 addi t1,t1,1 +8000039c: 1862a3af sc.w t2,t1,(t0) +800003a0: fe039ae3 bnez t2,80000394 +800003a4: 00000297 auipc t0,0x0 +800003a8: 0982a283 lw t0,152(t0) # 8000043c +800003ac: 00629c63 bne t0,t1,800003c4 +800003b0: 001e8293 addi t0,t4,1 +800003b4: 00000317 auipc t1,0x0 +800003b8: 08032623 sw zero,140(t1) # 80000440 +800003bc: 00000317 auipc t1,0x0 +800003c0: 08532423 sw t0,136(t1) # 80000444 + +800003c4 : +800003c4: 00000297 auipc t0,0x0 +800003c8: 0802a283 lw t0,128(t0) # 80000444 +800003cc: ffd28ce3 beq t0,t4,800003c4 +800003d0: f80002b7 lui t0,0xf8000 +800003d4: 01028293 addi t0,t0,16 # f8000010 +800003d8: f1402373 csrr t1,mhartid +800003dc: 01031313 slli t1,t1,0x10 +800003e0: 006282b3 add t0,t0,t1 +800003e4: 00a2a023 sw a0,0(t0) +800003e8: 00008067 ret + +800003ec : +800003ec: 00000413 li s0,0 +800003f0: f80002b7 lui t0,0xf8000 +800003f4: 00828293 addi t0,t0,8 # f8000008 +800003f8: f1402373 csrr t1,mhartid +800003fc: 01031313 slli t1,t1,0x10 +80000400: 006282b3 add t0,t0,t1 +80000404: 0082a023 sw s0,0(t0) +80000408: 0240006f j 8000042c + +8000040c : +8000040c: 00100413 li s0,1 +80000410: f80002b7 lui t0,0xf8000 +80000414: 00828293 addi t0,t0,8 # f8000008 +80000418: f1402373 csrr t1,mhartid +8000041c: 01031313 slli t1,t1,0x10 +80000420: 006282b3 add t0,t0,t1 +80000424: 0082a023 sw s0,0(t0) +80000428: 0040006f j 8000042c + +8000042c : +8000042c: 0000006f j 8000042c + +80000430 : +80000430: fff50513 addi a0,a0,-1 +80000434: fe051ee3 bnez a0,80000430 +80000438: 00008067 ret + +8000043c : +8000043c: 0000 unimp + ... + +80000440 : +80000440: 0000 unimp + ... + +80000444 : +80000444: 0000 unimp + ... + +80000448 : +80000448: 1000 addi s0,sp,32 + ... + +8000044c : +8000044c: 0000 unimp + ... + +80000450 : +80000450: 0000 unimp + ... + +80000454 : +80000454: 0000 unimp +80000456: 0000 unimp +80000458: 00000013 nop +8000045c: 00000013 nop +80000460: 00000013 nop +80000464: 00000013 nop +80000468: 00000013 nop +8000046c: 00000013 nop +80000470: 00000013 nop +80000474: 00000013 nop +80000478: 00000013 nop +8000047c: 00000013 nop +80000480: 00000013 nop +80000484: 00000013 nop +80000488: 00000013 nop +8000048c: 00000013 nop +80000490: 00000013 nop +80000494: 00000013 nop +80000498: 00000013 nop +8000049c: 00000013 nop +800004a0: 00000013 nop +800004a4: 00000013 nop +800004a8: 00000013 nop +800004ac: 00000013 nop +800004b0: 00000013 nop +800004b4: 00000013 nop +800004b8: 00000013 nop +800004bc: 00000013 nop + +800004c0 : +800004c0: 0000 unimp + ... + +800004c4 : +800004c4: 0000 unimp +800004c6: 0000 unimp +800004c8: 00000013 nop +800004cc: 00000013 nop +800004d0: 00000013 nop +800004d4: 00000013 nop +800004d8: 00000013 nop +800004dc: 00000013 nop +800004e0: 00000013 nop +800004e4: 00000013 nop +800004e8: 00000013 nop +800004ec: 00000013 nop +800004f0: 00000013 nop +800004f4: 00000013 nop +800004f8: 00000013 nop +800004fc: 00000013 nop + +80000500 : +80000500: 0000 unimp + ... + +80000504 : +80000504: 0000 unimp +80000506: 0000 unimp +80000508: 00000013 nop +8000050c: 00000013 nop +80000510: 00000013 nop +80000514: 00000013 nop +80000518: 00000013 nop +8000051c: 00000013 nop +80000520: 00000013 nop +80000524: 00000013 nop +80000528: 00000013 nop +8000052c: 00000013 nop +80000530: 00000013 nop +80000534: 00000013 nop +80000538: 00000013 nop +8000053c: 00000013 nop + +80000540 : +80000540: 0000 unimp + ... + +80000544 : +80000544: 0000 unimp + ... + +80000548 : + ... diff --git a/src/test/cpp/raw/smp/build/smp.bin b/src/test/cpp/raw/smp/build/smp.bin new file mode 100755 index 0000000..5ce6918 Binary files /dev/null and b/src/test/cpp/raw/smp/build/smp.bin differ diff --git a/src/test/cpp/raw/smp/makefile b/src/test/cpp/raw/smp/makefile new file mode 100644 index 0000000..0886c1b --- /dev/null +++ b/src/test/cpp/raw/smp/makefile @@ -0,0 +1,5 @@ +PROJ_NAME=smp + +ATOMIC=yes + +include ../common/asm.mk \ No newline at end of file diff --git a/src/test/cpp/raw/smp/src/crt.S b/src/test/cpp/raw/smp/src/crt.S new file mode 100644 index 0000000..9a72bd7 --- /dev/null +++ b/src/test/cpp/raw/smp/src/crt.S @@ -0,0 +1,287 @@ +#define CONSISTENCY_REDO_COUNT 50 + + +#define REPORT_OFFSET 0xF8000000 +#define REPORT_THREAD_ID 0x00 +#define REPORT_THREAD_COUNT 0x04 +#define REPORT_END 0x08 +#define REPORT_BARRIER_START 0x0C +#define REPORT_BARRIER_END 0x10 +#define REPORT_CONSISTENCY_VALUES 0x14 + +#define report(reg, id) \ + li t0, REPORT_OFFSET+id; \ + csrr t1, mhartid; \ + slli t1, t1, 16; \ + add t0, t0, t1; \ + sw reg, 0(t0); \ + +_start: + csrr s0, mhartid + report(s0, REPORT_THREAD_ID) + + +count_thread_start: + //Count up threads + li a0, 1 + la a1, thread_count + amoadd.w x0, a0, (a1) + +count_thread_wait: + //Wait everybody + lw s0, thread_count + li a0, 400 + call sleep + lw s1, thread_count + bne s1, s0, count_thread_wait + report(s1, REPORT_THREAD_COUNT) + + +barrier_amo_test: + li a0, 1 + call barrier_amo + li a0, 2 + call barrier_amo + li a0, 3 + call barrier_amo + + li a0, 4 + call barrier_lrsc + li a0, 5 + call barrier_lrsc + li a0, 6 + call barrier_lrsc + + li a0, 7 + call barrier_amo + li a0, 8 + call barrier_lrsc + + + lw gp, barrier_allocator + +consistancy_test1: + la t0, consistancy_init_load + sw t0, consistancy_init_call, t1 + la t0, consistancy_do_simple_fence + sw t0, consistancy_do_call, t1 + la t0, consistancy_test2 + sw t0, consistancy_done_call, t1 + j consistancy_start + +consistancy_test2: + la t0, consistancy_init_load + sw t0, consistancy_init_call, t1 + la t0, consistancy_do_rl_fence + sw t0, consistancy_do_call, t1 + la t0, success + sw t0, consistancy_done_call, t1 + j consistancy_start + + +consistancy_init_load: + lw s3, (s1) //Help getting the cache loaded for the consistancy check + j consistancy_do_init_done + +consistancy_do_simple_fence: + //Consistancy check : write to read ordering on two thread + sw s2, (s0) + fence w,r + lw s3, (s1) + sw s3, 64(s0) + j consistancy_join + +consistancy_do_rl_fence: + //Consistancy check : write to read ordering on two thread + sw s2, (s0) + lr.w.rl s3, (s1) + sw s3, 64(s0) + j consistancy_join + + +consistancy_start: + mv a0, gp + addi gp, gp, 1 + call barrier_lrsc + sw x0, consistancy_all_tested, t0 +consistancy_loop: + //Sync + mv a0, gp + addi gp, gp, 1 + call barrier_lrsc + + //all passed ? + lw t0, consistancy_all_tested + li t1, CONSISTENCY_REDO_COUNT + bge t0, t1, consistancy_passed + + //identify who is A, who is B + lw t0, consistancy_a_hart + lw t1, consistancy_b_hart + beq t0, t1, consistancy_join + csrr t0, mhartid + lw t1, consistancy_a_hart + la s0, consistancy_a_value + la s1, consistancy_b_value + beq t0, t1, consistancy_do + lw t1, consistancy_b_hart + la s0, consistancy_b_value + la s1, consistancy_a_value + beq t0, t1, consistancy_do + +consistancy_hart_not_involved: + mv a0, gp + addi gp, gp, 1 + call barrier_lrsc + j consistancy_join + +consistancy_do: + lw t0, consistancy_init_call + jalr t0 +consistancy_do_init_done: + li s2, 666 + mv a0, gp + addi gp, gp, 1 + call barrier_lrsc + + + lw t0, consistancy_do_call + jalr t0 + +consistancy_join: + fence rw, rw //ensure updated values + mv a0, gp + addi gp, gp, 1 + call barrier_lrsc + csrr t0, mhartid + bnez t0, consistancy_loop + +consistancy_assert: + lw t0, consistancy_a_hart + lw t1, consistancy_b_hart + beq t0, t1, consistancy_increment + lw a0, consistancy_a_readed + report(a0, REPORT_CONSISTENCY_VALUES) + lw a0, consistancy_b_readed + report(a0, REPORT_CONSISTENCY_VALUES) + +consistancy_increment: + csrr t0, mhartid + bnez t0, consistancy_loop + sw x0, (consistancy_a_value), t0 + sw x0, (consistancy_b_value), t0 + lw s0,thread_count + lw t0,consistancy_b_hart + addi t0, t0, 1 + sw t0, consistancy_b_hart, t1 + bne t0, s0, consistancy_increment_fence + sw x0, consistancy_b_hart, t1 + lw t0,consistancy_a_hart + addi t0, t0, 1 + sw t0, consistancy_a_hart, t1 + bne t0, s0, consistancy_increment_fence + sw x0, consistancy_a_hart, t1 + lw t0, consistancy_all_tested + addi t0, t0, 1 + sw t0, consistancy_all_tested, t1 +consistancy_increment_fence: + fence w, rw + j consistancy_loop + +consistancy_passed: + lw s0, consistancy_done_call + mv a0, gp + addi gp, gp, 1 + call barrier_lrsc + jalr s0 + + + +#define ENTRY_PHASE t4 +barrier_amo: + report(a0, REPORT_BARRIER_START) + lw ENTRY_PHASE, barrier_phase + la t0, barrier_value + li t1, 1 + amoadd.w t0, t1, (t0) + addi t0, t0, 1 + lw t1, thread_count + bne t0, t1, barrier_amo_wait + addi t0,ENTRY_PHASE,1 + sw x0, barrier_value, t1 + sw t0, barrier_phase, t1 +barrier_amo_wait: + lw t0, barrier_phase + beq t0, ENTRY_PHASE, barrier_amo_wait + report(a0, REPORT_BARRIER_END) + ret + +barrier_lrsc: + report(a0, REPORT_BARRIER_START) + lw ENTRY_PHASE, barrier_phase + la t0, barrier_value +barrier_lrsc_try: + lr.w t1, (t0) + addi t1, t1, 1 + sc.w t2, t1, (t0) + bnez t2, barrier_lrsc_try + lw t0, thread_count + bne t0, t1, barrier_lrsc_wait + addi t0,ENTRY_PHASE,1 + sw x0, barrier_value, t1 + sw t0, barrier_phase, t1 +barrier_lrsc_wait: + lw t0, barrier_phase + beq t0, ENTRY_PHASE, barrier_lrsc_wait + report(a0, REPORT_BARRIER_END) + ret + + + + +success: + li s0, 0 + report(s0, REPORT_END) + j end + +failure: + li s0, 1 + report(s0, REPORT_END) + j end + +end: + j end + + +sleep: + addi a0, a0, -1 + bnez a0, sleep + ret + + +thread_count: .word 0 + +.align 6 //Same cache line +barrier_value: .word 0 +barrier_phase: .word 0 +barrier_allocator: .word 0x1000 + +consistancy_a_hart: .word 0 +consistancy_b_hart: .word 0 +consistancy_all_tested: .word 0 + + +nop;nop;nop;nop;nop;nop;nop;nop; +nop;nop;nop;nop;nop;nop;nop;nop; +.align 6 //Same cache line +consistancy_a_value: .word 0 +consistancy_b_value: .word 0 + +.align 6 //Same cache line +consistancy_b_readed: .word 0 +consistancy_a_readed: .word 0 + +.align 6 //Same cache line +consistancy_init_call: .word 0 +consistancy_do_call: .word 0 +consistancy_done_call: .word 0 \ No newline at end of file diff --git a/src/test/cpp/raw/smp/src/ld b/src/test/cpp/raw/smp/src/ld new file mode 100644 index 0000000..93d8de8 --- /dev/null +++ b/src/test/cpp/raw/smp/src/ld @@ -0,0 +1,16 @@ +OUTPUT_ARCH( "riscv" ) + +MEMORY { + onChipRam (W!RX)/*(RX)*/ : ORIGIN = 0x80000000, LENGTH = 128K +} + +SECTIONS +{ + + .crt_section : + { + . = ALIGN(4); + *crt.o(.text) + } > onChipRam + +} diff --git a/src/test/cpp/regression/main.cpp b/src/test/cpp/regression/main.cpp index d983e8e..5468545 100644 --- a/src/test/cpp/regression/main.cpp +++ b/src/test/cpp/regression/main.cpp @@ -376,6 +376,7 @@ public: bool lrscReserved; + uint32_t lrscReservedAddress; RiscvGolden() { pc = 0x80000000; @@ -389,7 +390,7 @@ public: mcause.raw = 0; mbadaddr = 0; mepc = 0; - misa = 0; //TODO + misa = 0x40041101; //TODO status.raw = 0; status.mpp = 3; status.spp = 1; @@ -400,6 +401,7 @@ public: ipSoft = 0; ipInput = 0; stepCounter = 0; + sbadaddr = 42; lrscReserved = false; } @@ -464,7 +466,6 @@ public: cout << hex << " a7=0x" << regs[17] << " a0=0x" << regs[10] << " a1=0x" << regs[11] << " a2=0x" << regs[12] << dec << endl; } #endif - lrscReserved = false; //Check leguality of the interrupt if(interrupt) { bool hit = false; @@ -513,7 +514,7 @@ public: pcWrite(xtvec.base << 2); if(interrupt) livenessInterrupt = 0; - if(!interrupt) step(); //As VexRiscv instruction which trap do not reach writeback stage fire +// if(!interrupt) step(); //As VexRiscv instruction which trap do not reach writeback stage fire } uint32_t currentInstruction; @@ -540,6 +541,7 @@ public: case MISA: *value = misa; break; case MEDELEG: *value = medeleg; break; case MIDELEG: *value = mideleg; break; + case MHARTID: *value = 0; break; case SSTATUS: *value = status.raw & 0xC0133; break; case SIP: *value = getIp().raw & 0x333; break; @@ -578,7 +580,7 @@ public: case MEPC: mepc = value; break; case MSCRATCH: mscratch = value; break; case MISA: misa = value; break; - case MEDELEG: medeleg = value; break; + case MEDELEG: medeleg = value & (~0x8); break; case MIDELEG: mideleg = value; break; case SSTATUS: maskedWrite(status.raw, value,0xC0133); break; @@ -835,7 +837,6 @@ public: status.mpie = 1; status.mpp = 0; pcWrite(mepc); - lrscReserved = false; }break; case 0x10200073:{ //SRET if(privilege < 1){ ilegalInstruction(); return;} @@ -844,7 +845,6 @@ public: status.spie = 1; status.spp = 0; pcWrite(sepc); - lrscReserved = false; }break; case 0x00000073:{ //ECALL trap(0, 8+privilege, 0x00000073); //To follow the VexRiscv area saving implementation @@ -894,6 +894,7 @@ public: trap(0, 5, address); } else { lrscReserved = true; + lrscReservedAddress = pAddr; rfWrite(rd32, data); pcWrite(pc + 4); } @@ -905,10 +906,15 @@ public: trap(0, 6, address); } else { if(v2p(address, &pAddr, WRITE)){ trap(0, 15, address); return; } - bool hit = lrscReserved; + #ifdef DBUS_EXCLUSIVE + bool hit = lrscReserved && lrscReservedAddress == pAddr; + #else + bool hit = lrscReserved; + #endif if(hit){ dWrite(pAddr, 4, i32_rs2); } + lrscReserved = false; rfWrite(rd32, !hit); pcWrite(pc + 4); } @@ -922,6 +928,10 @@ public: int32_t src = i32_rs2; int32_t readValue; + #ifdef DBUS_EXCLUSIVE + lrscReserved = false; + #endif + uint32_t pAddr; if(v2p(addr, &pAddr, READ_WRITE)){ trap(0, 15, addr); return; } if(dRead(pAddr, 4, (uint32_t*)&readValue)){ @@ -1098,6 +1108,7 @@ public: #ifdef TRACE VerilatedVcdC* tfp; #endif + bool allowInvalidate = true; uint32_t seed; @@ -1250,7 +1261,7 @@ public: top = new VVexRiscv; #ifdef TRACE_ACCESS regTraces.open (name + ".regTrace"); - memTraces.open (name + ".memTrace");hh + memTraces.open (name + ".memTrace"); #endif logTraces.open (name + ".logTrace"); debugLog.open (name + ".debugTrace"); @@ -1297,6 +1308,14 @@ public: return this; } + Workspace* withInvalidation(){ + allowInvalidate = true; + return this; + } + Workspace* withoutInvalidation(){ + allowInvalidate = false; + return this; + } virtual bool isPerifRegion(uint32_t addr) { return false; } virtual bool isMmuRegion(uint32_t addr) { return true;} virtual void iBusAccess(uint32_t addr, uint32_t *data, bool *error) { @@ -1325,7 +1344,7 @@ public: #endif ) << #endif - " : WRITE mem" << (1 << size) << "[" << addr << "] = " << *data << endl; + " : WRITE mem" << hex << (1 << size) << "[" << addr << "] = " << *data << dec << endl; for(uint32_t b = 0;b < (1 << size);b++){ uint32_t offset = (addr+b)&0x3; if((mask >> offset) & 1 == 1) @@ -1339,6 +1358,7 @@ public: *data &= ~(0xFF << (offset*8)); *data |= mem[addr + b] << (offset*8); } + /* memTraces << #ifdef TRACE_WITH_TIME (currentTime @@ -1347,7 +1367,7 @@ public: #endif ) << #endif - " : READ mem" << (1 << size) << "[" << addr << "] = " << *data << endl; + " : READ mem" << (1 << size) << "[" << addr << "] = " << *data << endl;*/ } } @@ -1409,10 +1429,13 @@ public: virtual void pass(){ throw success();} virtual void fail(){ throw std::exception();} virtual void fillSimELements(); - void dump(int i){ + void dump(uint64_t i){ #ifdef TRACE - if(i == TRACE_START && i != 0) cout << "START TRACE" << endl; + if(i == TRACE_START && i != 0) cout << "**" << endl << "**" << endl << "**" << endl << "**" << endl << "**" << endl << "START TRACE" << endl; if(i >= TRACE_START) tfp->dump(i); + #ifdef TRACE_SPORADIC + else if(i % 1000000 < 100) tfp->dump(i); + #endif #endif } @@ -1520,7 +1543,7 @@ public: currentTime = i; #ifdef FLOW_INFO - if(i % 2000000 == 0) cout << "PROGRESS TRACE_START=" << i << endl; + if(i % 2000000 == 0) cout << "**" << endl << "**" << endl << "**" << endl << "**" << endl << "**" << endl << "PROGRESS TRACE_START=" << i << endl; #endif @@ -1607,6 +1630,14 @@ public: } } + #ifdef CSR + if(top->VexRiscv->CsrPlugin_hadException){ + if(riscvRefEnable) { + riscvRef.step(); + } + } + #endif + for(SimElement* simElement : simElements) simElement->preCycle(); dump(i + 1); @@ -1769,7 +1800,8 @@ public: uint32_t regFileWriteRefIndex = 0; - char *target = "PROJECT EXECUTION SUCCESSFUL", *hit = target; + const char *target = "PROJECT EXECUTION SUCCESSFUL"; + const char *hit = target; ZephyrRegression(string name) : WorkspaceRegression(name) { cout << endl << endl; @@ -1778,7 +1810,7 @@ public: virtual void dutPutChar(char c){ if(*hit == c) hit++; else hit = target; - if(*hit == NULL) { + if(*hit == 0) { cout << endl << "T=" << i <fail(); } #endif - ws->iBusAccess(address,&top->iBus_rsp_payload_data,&error); + error = false; + for(int idx = 0;idx < IBUS_DATA_WIDTH/32;idx++){ + bool localError = false; + ws->iBusAccess(address+idx*4,((uint32_t*)&top->iBus_rsp_payload_data)+idx,&localError); + error |= localError; + } top->iBus_rsp_payload_error = error; - pendingCount--; - address = address + 4; + pendingCount-=IBUS_DATA_WIDTH/32; + address = address + IBUS_DATA_WIDTH/8; top->iBus_rsp_valid = 1; } if(ws->iStall) top->iBus_cmd_ready = VL_RANDOM_I(7) < 100 && pendingCount == 0; @@ -2316,16 +2353,28 @@ public: #ifdef DBUS_CACHED //#include "VVexRiscv_DataCache.h" +#include + +struct DBusCachedTask{ + char data[DBUS_DATA_WIDTH/8]; + bool error; + bool last; + bool exclusive; +}; class DBusCached : public SimElement{ public: - uint32_t address; - bool error_next = false; - uint32_t pendingCount = 0; - bool wr; + queue rsps; + queue invalidationHint; + + bool reservationValid = false; + uint32_t reservationAddress; + uint32_t pendingSync = 0; Workspace *ws; VVexRiscv* top; + DBusCachedTask rsp; + DBusCached(Workspace* ws){ this->ws = ws; this->top = ws->top; @@ -2334,54 +2383,135 @@ public: virtual void onReset(){ top->dBus_cmd_ready = 1; top->dBus_rsp_valid = 0; + #ifdef DBUS_INVALIDATE + top->dBus_inv_valid = 0; + top->dBus_ack_ready = 0; + top->dBus_sync_valid = 0; + #endif } virtual void preCycle(){ - VL_IN8(io_cpu_execute_isValid,0,0); - VL_IN8(io_cpu_execute_isStuck,0,0); - VL_IN8(io_cpu_execute_args_kind,0,0); - VL_IN8(io_cpu_execute_args_wr,0,0); - VL_IN8(io_cpu_execute_args_size,1,0); - VL_IN8(io_cpu_execute_args_forceUncachedAccess,0,0); - VL_IN8(io_cpu_execute_args_clean,0,0); - VL_IN8(io_cpu_execute_args_invalidate,0,0); - VL_IN8(io_cpu_execute_args_way,0,0); - -// if(top->VexRiscv->dataCache_1->io_cpu_execute_isValid && !top->VexRiscv->dataCache_1->io_cpu_execute_isStuck -// && top->VexRiscv->dataCache_1->io_cpu_execute_args_wr){ -// if(top->VexRiscv->dataCache_1->io_cpu_execute_args_address == 0x80025978) -// cout << "WR 0x80025978 = " << hex << setw(8) << top->VexRiscv->dataCache_1->io_cpu_execute_args_data << endl; -// if(top->VexRiscv->dataCache_1->io_cpu_execute_args_address == 0x8002596c) -// cout << "WR 0x8002596c = " << hex << setw(8) << top->VexRiscv->dataCache_1->io_cpu_execute_args_data << endl; -// } if (top->dBus_cmd_valid && top->dBus_cmd_ready) { - if(pendingCount == 0){ - pendingCount = top->dBus_cmd_payload_length+1; - address = top->dBus_cmd_payload_address; - wr = top->dBus_cmd_payload_wr; - } - if(top->dBus_cmd_payload_wr){ - ws->dBusAccess(address,top->dBus_cmd_payload_wr,2,top->dBus_cmd_payload_mask,&top->dBus_cmd_payload_data,&error_next); - address += 4; - pendingCount--; - } + if(top->dBus_cmd_payload_wr){ + #ifdef DBUS_INVALIDATE + pendingSync += 1; + #endif + #ifndef DBUS_EXCLUSIVE + bool error; + ws->dBusAccess(top->dBus_cmd_payload_address,1,2,top->dBus_cmd_payload_mask,&top->dBus_cmd_payload_data,&error); + #else + bool cancel = false, error = false; + if(top->dBus_cmd_payload_exclusive){ + bool hit = reservationValid && reservationAddress == top->dBus_cmd_payload_address; + rsp.exclusive = hit; + cancel = !hit; + reservationValid = false; + } + if(!cancel) { + for(int idx = 0;idx < 1;idx++){ + bool localError = false; + ws->dBusAccess(top->dBus_cmd_payload_address+idx*4,1,2,top->dBus_cmd_payload_mask >> idx*4,((uint32_t*)&top->dBus_cmd_payload_data)+idx, &localError); + error |= localError; + + //printf("%d ", (int)localError); + } + } + + // printf("%x %d\n", top->dBus_cmd_payload_address, (int)error); + rsp.last = true; + rsp.error = error; + rsps.push(rsp); + #endif + } else { + bool error = false; + uint32_t beatCount = top->dBus_cmd_payload_length*32/DBUS_DATA_WIDTH; + for(int beat = 0;beat <= beatCount;beat++){ + if(top->dBus_cmd_payload_length == 0){ + uint32_t sel = (top->dBus_cmd_payload_address >> 2) & (DBUS_DATA_WIDTH/32-1); + ws->dBusAccess(top->dBus_cmd_payload_address,0,2,0,((uint32_t*)rsp.data) + sel,&error); + } else { + for(int idx = 0;idx < DBUS_DATA_WIDTH/32;idx++){ + bool localError = false; + ws->dBusAccess(top->dBus_cmd_payload_address + beat * DBUS_DATA_WIDTH/8 + idx*4,0,2,0,((uint32_t*)rsp.data)+idx, &localError); + error |= localError; + } + } + rsp.last = beat == beatCount; + #ifdef DBUS_EXCLUSIVE + if(top->dBus_cmd_payload_exclusive){ + rsp.exclusive = true; + reservationValid = true; + reservationAddress = top->dBus_cmd_payload_address; + } + #endif + rsp.error = error; + rsps.push(rsp); + } + + #ifdef DBUS_INVALIDATE + if(ws->allowInvalidate){ + if(VL_RANDOM_I(7) < 10){ + invalidationHint.push(top->dBus_cmd_payload_address + VL_RANDOM_I(5)); + } + } + #endif + } } + #ifdef DBUS_INVALIDATE + if(top->dBus_sync_valid && top->dBus_sync_ready){ + pendingSync -= 1; + } + #endif } virtual void postCycle(){ - if(pendingCount != 0 && !wr && (!ws->dStall || VL_RANDOM_I(7) < 100)){ - ws->dBusAccess(address,0,2,0,&top->dBus_rsp_payload_data,&error_next); - top->dBus_rsp_payload_error = error_next; + + if(!rsps.empty() && (!ws->dStall || VL_RANDOM_I(7) < 100)){ + DBusCachedTask rsp = rsps.front(); + rsps.pop(); top->dBus_rsp_valid = 1; - address += 4; - pendingCount--; + top->dBus_rsp_payload_error = rsp.error; + for(int idx = 0;idx < DBUS_DATA_WIDTH/32;idx++){ + ((uint32_t*)&top->dBus_rsp_payload_data)[idx] = ((uint32_t*)rsp.data)[idx]; + } + top->dBus_rsp_payload_last = rsp.last; + #ifdef DBUS_EXCLUSIVE + top->dBus_rsp_payload_exclusive = rsp.exclusive; + #endif } else{ top->dBus_rsp_valid = 0; - top->dBus_rsp_payload_data = VL_RANDOM_I(32); + for(int idx = 0;idx < DBUS_DATA_WIDTH/32;idx++){ + ((uint32_t*)&top->dBus_rsp_payload_data)[idx] = VL_RANDOM_I(32); + } top->dBus_rsp_payload_error = VL_RANDOM_I(1); + top->dBus_rsp_payload_last = VL_RANDOM_I(1); + #ifdef DBUS_EXCLUSIVE + top->dBus_rsp_payload_exclusive = VL_RANDOM_I(1); + #endif } + top->dBus_cmd_ready = (ws->dStall ? VL_RANDOM_I(7) < 100 : 1); + + #ifdef DBUS_INVALIDATE + if(ws->allowInvalidate){ + if(top->dBus_inv_ready) top->dBus_inv_valid = 0; + if(top->dBus_inv_valid == 0 && VL_RANDOM_I(7) < 5){ + top->dBus_inv_valid = 1; + top->dBus_inv_payload_fragment_enable = VL_RANDOM_I(7) < 100; + if(!invalidationHint.empty()){ + top->dBus_inv_payload_fragment_address = invalidationHint.front(); + invalidationHint.pop(); + } else { + top->dBus_inv_payload_fragment_address = VL_RANDOM_I(32); + } + } + } + top->dBus_ack_ready = (ws->dStall ? VL_RANDOM_I(7) < 100 : 1); + if(top->dBus_sync_ready) top->dBus_sync_valid = 0; + if(top->dBus_sync_valid == 0 && pendingSync != 0 && (ws->dStall ? VL_RANDOM_I(7) < 80 : 1) ){ + top->dBus_sync_valid = 1; + } + #endif - top->dBus_cmd_ready = (ws->dStall ? VL_RANDOM_I(7) < 100 : 1) && (pendingCount == 0 || wr); } }; #endif @@ -3335,7 +3465,6 @@ public: } }; - class LinuxRegression: public LinuxSoc{ public: string pendingLine = ""; @@ -3368,6 +3497,82 @@ public: #endif +#ifdef LINUX_SOC_SMP + +class LinuxSocSmp : public Workspace{ +public: + queue customCin; + void pushCin(string m){ + for(char& c : m) { + customCin.push(c); + } + } + + LinuxSocSmp(string name) : Workspace(name) { + #ifdef WITH_USER_IO + stdinNonBuffered(); + captureCtrlC(); + #endif + stdoutNonBuffered(); + } + + virtual ~LinuxSocSmp(){ + #ifdef WITH_USER_IO + stdinRestore(); + #endif + } + virtual bool isDBusCheckedRegion(uint32_t address){ return true;} + virtual bool isPerifRegion(uint32_t addr) { return (addr & 0xF0000000) == 0xF0000000;} + virtual bool isMmuRegion(uint32_t addr) { return true; } + + + + virtual void dBusAccess(uint32_t addr,bool wr, uint32_t size,uint32_t mask, uint32_t *data, bool *error) { + if(isPerifRegion(addr)) switch(addr){ + //TODO Emulate peripherals here + case 0xF0010000: if(wr && *data != 0) fail(); else *data = 0; break; + case 0xF001BFF8: if(wr) fail(); else *data = mTime; break; + case 0xF001BFFC: if(wr) fail(); else *data = mTime >> 32; break; + case 0xF0014000: if(wr) mTimeCmp = (mTimeCmp & 0xFFFFFFFF00000000) | *data; else fail(); break; + case 0xF0014004: if(wr) mTimeCmp = (mTimeCmp & 0x00000000FFFFFFFF) | (((uint64_t)*data) << 32); else fail(); break; + case 0xF0000000: + if(wr){ + char c = (char)*data; + cout << c; + logTraces << c; + logTraces.flush(); + onStdout(c); + } + case 0xF0000004: + if(!wr){ + #ifdef WITH_USER_IO + if(stdinNonEmpty()){ + char c; + read(0, &c, 1); + *data = c; + } else + #endif + if(!customCin.empty()){ + *data = customCin.front(); + customCin.pop(); + } else { + *data = -1; + } + } + break; + default: cout << "Unmapped peripheral access : addr=0x" << hex << addr << " wr=" << wr << " mask=0x" << mask << " data=0x" << data << dec << endl; fail(); break; + } + + Workspace::dBusAccess(addr,wr,size,mask,data,error); + } + + virtual void onStdout(char c){ + + } +}; + +#endif + string riscvTestMain[] = { //"rv32ui-p-simple", "rv32ui-p-lui", @@ -3724,6 +3929,27 @@ int main(int argc, char **argv, char **env) { #endif +#ifdef LINUX_SOC_SMP + { + + LinuxSocSmp soc("linuxSmp"); + #ifndef DEBUG_PLUGIN_EXTERNAL + soc.withRiscvRef(); + soc.loadBin(EMULATOR, 0x80000000); + soc.loadBin(VMLINUX, 0xC0000000); + soc.loadBin(DTB, 0xC4000000); + soc.loadBin(RAMDISK, 0xC2000000); + #endif + //soc.setIStall(true); + //soc.setDStall(true); + soc.bootAt(0x80000000); + soc.run(0); +// soc.run((496300000l + 2000000) / 2); +// soc.run(438700000l/2); + return -1; + } +#endif + diff --git a/src/test/cpp/regression/makefile b/src/test/cpp/regression/makefile index 61fe9d1..c7dcf5f 100644 --- a/src/test/cpp/regression/makefile +++ b/src/test/cpp/regression/makefile @@ -3,10 +3,13 @@ REGRESSION_PATH?=./ VEXRISCV_FILE?=../../../../VexRiscv.v IBUS?=CACHED IBUS_TC?=no +IBUS_DATA_WIDTH?=32 DBUS?=CACHED +DBUS_DATA_WIDTH?=32 TRACE?=no TRACE_ACCESS?=no TRACE_START=0 +TRACE_SPORADIC?=no ISA_TEST?=yes MUL?=yes DIV?=yes @@ -15,6 +18,8 @@ CSR_SKIP_TEST?=no EBREAK?=no FENCEI?=no MMU?=yes +DBUS_EXCLUSIVE?=no +DBUS_INVALIDATE?=no SEED?=no LRSC?=no AMO?=no @@ -39,11 +44,18 @@ STOP_ON_ERROR?=no COREMARK=no WITH_USER_IO?=no + ADDCFLAGS += -CFLAGS -DREGRESSION_PATH='\"$(REGRESSION_PATH)/\"' ADDCFLAGS += -CFLAGS -DIBUS_${IBUS} +ADDCFLAGS += -CFLAGS -DIBUS_DATA_WIDTH=${IBUS_DATA_WIDTH} +ADDCFLAGS += -CFLAGS -DDBUS_DATA_WIDTH=${DBUS_DATA_WIDTH} + ADDCFLAGS += -CFLAGS -DDBUS_${DBUS} ADDCFLAGS += -CFLAGS -DREDO=${REDO} ADDCFLAGS += -CFLAGS -pthread +ADDCFLAGS += -CFLAGS -Wno-unused-result + + ADDCFLAGS += -CFLAGS -DTHREAD_COUNT=${THREAD_COUNT} @@ -73,6 +85,15 @@ ifeq ($(LINUX_SOC),yes) ADDCFLAGS += -CFLAGS -DEMULATOR='\"$(EMULATOR)\"' endif +ifeq ($(LINUX_SOC_SMP),yes) + ADDCFLAGS += -CFLAGS -DLINUX_SOC_SMP + ADDCFLAGS += -CFLAGS -DVMLINUX='\"$(VMLINUX)\"' + ADDCFLAGS += -CFLAGS -DDTB='\"$(DTB)\"' + ADDCFLAGS += -CFLAGS -DRAMDISK='\"$(RAMDISK)\"' + ADDCFLAGS += -CFLAGS -DEMULATOR='\"$(EMULATOR)\"' +endif + + ARCH_LINUX=rv32i ifeq ($(MUL),yes) ifeq ($(DIV),yes) @@ -176,6 +197,12 @@ ifeq ($(TRACE),yes) ADDCFLAGS += -CFLAGS -DTRACE endif +ifeq ($(TRACE_SPORADIC),yes) + ADDCFLAGS += -CFLAGS -DTRACE_SPORADIC +endif + + + ifeq ($(CSR),yes) ADDCFLAGS += -CFLAGS -DCSR endif @@ -217,6 +244,13 @@ ifeq ($(MMU),yes) ADDCFLAGS += -CFLAGS -DMMU endif +ifeq ($(DBUS_EXCLUSIVE),yes) + ADDCFLAGS += -CFLAGS -DDBUS_EXCLUSIVE +endif +ifeq ($(DBUS_INVALIDATE),yes) + ADDCFLAGS += -CFLAGS -DDBUS_INVALIDATE +endif + ifeq ($(MUL),yes) ADDCFLAGS += -CFLAGS -DMUL endif @@ -278,5 +312,4 @@ compile: verilate clean: rm -rf obj_dir - rm -f VexRiscv.v*.bin diff --git a/src/test/scala/vexriscv/DhrystoneBench.scala b/src/test/scala/vexriscv/DhrystoneBench.scala index d23c4e1..39c434a 100644 --- a/src/test/scala/vexriscv/DhrystoneBench.scala +++ b/src/test/scala/vexriscv/DhrystoneBench.scala @@ -2,7 +2,8 @@ package vexriscv import java.io.File -import org.scalatest.{FunSuite} +import org.scalatest.FunSuite +import spinal.core.SpinalVerilog import vexriscv.demo._ import scala.sys.process._ @@ -42,6 +43,43 @@ class DhrystoneBench extends FunSuite{ } + getDmips( + name = "GenTwoStageArty", + gen = SpinalVerilog(GenTwoStage.cpu( + withMulDiv = false, + bypass = false, + barrielShifter = false + )), + testCmd = "make clean run REDO=10 IBUS=SIMPLE DBUS=SIMPLE CSR=no MMU=no DEBUG_PLUGIN=no MUL=no DIV=no COREMARK=yes" + ) + getDmips( + name = "GenTwoStageBarrielArty", + gen = SpinalVerilog(GenTwoStage.cpu( + withMulDiv = false, + bypass = true, + barrielShifter = true + )), + testCmd = "make clean run REDO=10 IBUS=SIMPLE DBUS=SIMPLE CSR=no MMU=no DEBUG_PLUGIN=no MUL=no DIV=no COREMARK=yes" + ) + getDmips( + name = "GenTwoStageMDArty", + gen = SpinalVerilog(GenTwoStage.cpu( + withMulDiv = true, + bypass = false, + barrielShifter = false + )), + testCmd = "make clean run REDO=10 IBUS=SIMPLE DBUS=SIMPLE CSR=no MMU=no DEBUG_PLUGIN=no MUL=yes DIV=yes COREMARK=yes" + ) + getDmips( + name = "GenTwoStageMDBarrielArty", + gen = SpinalVerilog(GenTwoStage.cpu( + withMulDiv = true, + bypass = true, + barrielShifter = true + )), + testCmd = "make clean run REDO=10 IBUS=SIMPLE DBUS=SIMPLE CSR=no MMU=no DEBUG_PLUGIN=no MUL=yes DIV=yes COREMARK=yes" + ) + getDmips( name = "GenSmallestNoCsr", gen = GenSmallestNoCsr.main(null), @@ -104,7 +142,7 @@ class DhrystoneBench extends FunSuite{ gen = LinuxGen.main(Array.fill[String](0)("")), testCmd = "make clean run IBUS=CACHED DBUS=CACHED DEBUG_PLUGIN=STD DHRYSTONE=yes SUPERVISOR=yes MMU=no CSR=yes CSR_SKIP_TEST=yes COMPRESSED=no MUL=yes DIV=yes LRSC=yes AMO=yes REDO=10 TRACE=no COREMARK=yes LINUX_REGRESSION=no" ) - //make run IBUS=CACHED DBUS=CACHED DEBUG_PLUGIN=STD DHRYSTONE=yess SUPERVISOR=yes CSR=yes COMPRESSED=no MUL=yes DIV=yes LRSC=yes AMO=yes REDO=1 TRACE=no LINUX_REGRESSION=yes SEED=42 +// //make run IBUS=CACHED DBUS=CACHED DEBUG_PLUGIN=STD DHRYSTONE=yess SUPERVISOR=yes CSR=yes COMPRESSED=no MUL=yes DIV=yes LRSC=yes AMO=yes REDO=1 TRACE=no LINUX_REGRESSION=yes SEED=42 test("final_report") { diff --git a/src/test/scala/vexriscv/TestIndividualFeatures.scala b/src/test/scala/vexriscv/TestIndividualFeatures.scala index bbf84a4..f3f5b28 100644 --- a/src/test/scala/vexriscv/TestIndividualFeatures.scala +++ b/src/test/scala/vexriscv/TestIndividualFeatures.scala @@ -1,7 +1,7 @@ package vexriscv import java.io.{File, OutputStream} -import java.util.concurrent.TimeUnit +import java.util.concurrent.{ForkJoinPool, TimeUnit} import org.apache.commons.io.FileUtils import org.scalatest.{BeforeAndAfterAll, FunSuite, ParallelTestExecution, Tag, Transformer} @@ -321,9 +321,12 @@ class IBusDimension(rvcRate : Double) extends VexRiscvDimension("IBus") { override def randomPositionImpl(universes: Seq[ConfigUniverse], r: Random) = { val catchAll = universes.contains(VexRiscvUniverse.CATCH_ALL) - val mmuConfig = if(universes.contains(VexRiscvUniverse.MMU)) MmuPortConfig( portTlbSize = 4) else null + val noMemory = universes.contains(VexRiscvUniverse.NO_MEMORY) + val noWriteBack = universes.contains(VexRiscvUniverse.NO_WRITEBACK) + if(r.nextDouble() < 0.5){ + val mmuConfig = if(universes.contains(VexRiscvUniverse.MMU)) MmuPortConfig( portTlbSize = 4) else null val latency = r.nextInt(5) + 1 val compressed = r.nextDouble() < rvcRate val injectorStage = r.nextBoolean() || latency == 1 @@ -347,14 +350,20 @@ class IBusDimension(rvcRate : Double) extends VexRiscvDimension("IBus") { override def instructionAnticipatedOk() = injectorStage } } else { + val twoStageMmu = r.nextBoolean() + val mmuConfig = if(universes.contains(VexRiscvUniverse.MMU)) MmuPortConfig(portTlbSize = 4, latency = if(twoStageMmu) 1 else 0, earlyRequireMmuLockup = Random.nextBoolean() && twoStageMmu, earlyCacheHits = Random.nextBoolean() && twoStageMmu) else null + val catchAll = universes.contains(VexRiscvUniverse.CATCH_ALL) val compressed = r.nextDouble() < rvcRate val tighlyCoupled = r.nextBoolean() && !catchAll + val reducedBankWidth = r.nextBoolean() // val tighlyCoupled = false val prediction = random(r, List(NONE, STATIC, DYNAMIC, DYNAMIC_TARGET)) val relaxedPcCalculation, twoCycleCache, injectorStage = r.nextBoolean() val twoCycleRam = r.nextBoolean() && twoCycleCache - val bytePerLine = List(8,16,32,64)(r.nextInt(4)) + val twoCycleRamInnerMux = r.nextBoolean() && twoCycleRam + val memDataWidth = List(32,64,128)(r.nextInt(3)) + val bytePerLine = Math.max(memDataWidth/8, List(8,16,32,64)(r.nextInt(4))) var cacheSize = 0 var wayCount = 0 do{ @@ -362,8 +371,8 @@ class IBusDimension(rvcRate : Double) extends VexRiscvDimension("IBus") { wayCount = 1 << r.nextInt(3) }while(cacheSize/wayCount < 512 || (catchAll && cacheSize/wayCount > 4096)) - new VexRiscvPosition("Cached" + (if(twoCycleCache) "2cc" else "") + (if(injectorStage) "Injstage" else "") + (if(twoCycleRam) "2cr" else "") + "S" + cacheSize + "W" + wayCount + "BPL" + bytePerLine + (if(relaxedPcCalculation) "Relax" else "") + (if(compressed) "Rvc" else "") + prediction.getClass.getTypeName().replace("$","")+ (if(tighlyCoupled)"Tc" else "")) with InstructionAnticipatedPosition{ - override def testParam = "IBUS=CACHED" + (if(compressed) " COMPRESSED=yes" else "") + (if(tighlyCoupled)" IBUS_TC=yes" else "") + new VexRiscvPosition(s"Cached${memDataWidth}d" + (if(twoCycleCache) "2cc" else "") + (if(injectorStage) "Injstage" else "") + (if(twoCycleRam) "2cr" else "") + "S" + cacheSize + "W" + wayCount + "BPL" + bytePerLine + (if(relaxedPcCalculation) "Relax" else "") + (if(compressed) "Rvc" else "") + prediction.getClass.getTypeName().replace("$","")+ (if(tighlyCoupled)"Tc" else "")) with InstructionAnticipatedPosition{ + override def testParam = s"IBUS=CACHED IBUS_DATA_WIDTH=$memDataWidth" + (if(compressed) " COMPRESSED=yes" else "") + (if(tighlyCoupled)" IBUS_TC=yes" else "") override def applyOn(config: VexRiscvConfig): Unit = { val p = new IBusCachedPlugin( resetVector = 0x80000000l, @@ -378,12 +387,14 @@ class IBusDimension(rvcRate : Double) extends VexRiscvDimension("IBus") { wayCount = wayCount, addressWidth = 32, cpuDataWidth = 32, - memDataWidth = 32, + memDataWidth = memDataWidth, catchIllegalAccess = catchAll, catchAccessFault = catchAll, asyncTagMemory = false, twoCycleRam = twoCycleRam, - twoCycleCache = twoCycleCache + twoCycleCache = twoCycleCache, + twoCycleRamInnerMux = twoCycleRamInnerMux, + reducedBankWidth = reducedBankWidth ) ) if(tighlyCoupled) p.newTightlyCoupledPort(TightlyCoupledPortParameter("iBusTc", a => a(30 downto 28) === 0x0)) @@ -402,13 +413,12 @@ class DBusDimension extends VexRiscvDimension("DBus") { override def randomPositionImpl(universes: Seq[ConfigUniverse], r: Random) = { val catchAll = universes.contains(VexRiscvUniverse.CATCH_ALL) - val mmuConfig = if(universes.contains(VexRiscvUniverse.MMU)) MmuPortConfig( portTlbSize = 4) else null val noMemory = universes.contains(VexRiscvUniverse.NO_MEMORY) val noWriteBack = universes.contains(VexRiscvUniverse.NO_WRITEBACK) - if(r.nextDouble() < 0.4 || noMemory){ + val mmuConfig = if(universes.contains(VexRiscvUniverse.MMU)) MmuPortConfig( portTlbSize = 4, latency = 0) else null val withLrSc = catchAll val earlyInjection = r.nextBoolean() && !universes.contains(VexRiscvUniverse.NO_WRITEBACK) new VexRiscvPosition("Simple" + (if(earlyInjection) "Early" else "Late")) { @@ -423,21 +433,27 @@ class DBusDimension extends VexRiscvDimension("DBus") { // override def isCompatibleWith(positions: Seq[ConfigPosition[VexRiscvConfig]]) = catchAll == positions.exists(_.isInstanceOf[CatchAllPosition]) } } else { - val bytePerLine = List(8,16,32,64)(r.nextInt(4)) + val twoStageMmu = r.nextBoolean() && !noMemory && !noWriteBack + val mmuConfig = if(universes.contains(VexRiscvUniverse.MMU)) MmuPortConfig(portTlbSize = 4, latency = if(twoStageMmu) 1 else 0, earlyRequireMmuLockup = Random.nextBoolean() && twoStageMmu, earlyCacheHits = Random.nextBoolean() && twoStageMmu) else null + val memDataWidth = List(32,64,128)(r.nextInt(3)) + val bytePerLine = Math.max(memDataWidth/8, List(8,16,32,64)(r.nextInt(4))) var cacheSize = 0 var wayCount = 0 val withLrSc = catchAll - val withAmo = catchAll && r.nextBoolean() - val dBusRspSlavePipe, relaxedMemoryTranslationRegister = r.nextBoolean() + val withSmp = withLrSc && r.nextBoolean() + val withAmo = catchAll && r.nextBoolean() || withSmp + val dBusRspSlavePipe = r.nextBoolean() || withSmp + val relaxedMemoryTranslationRegister = r.nextBoolean() val earlyWaysHits = r.nextBoolean() && !noWriteBack + val directTlbHit = r.nextBoolean() && mmuConfig.isInstanceOf[MmuPortConfig] val dBusCmdMasterPipe, dBusCmdSlavePipe = false //As it create test bench issues do{ cacheSize = 512 << r.nextInt(5) wayCount = 1 << r.nextInt(3) }while(cacheSize/wayCount < 512 || (catchAll && cacheSize/wayCount > 4096)) - new VexRiscvPosition("Cached" + "S" + cacheSize + "W" + wayCount + "BPL" + bytePerLine + (if(dBusCmdMasterPipe) "Cmp " else "") + (if(dBusCmdSlavePipe) "Csp " else "") + (if(dBusRspSlavePipe) "Rsp " else "") + (if(relaxedMemoryTranslationRegister) "Rmtr " else "") + (if(earlyWaysHits) "Ewh " else "")) { - override def testParam = "DBUS=CACHED " + (if(withLrSc) "LRSC=yes " else "") + (if(withAmo) "AMO=yes " else "") + new VexRiscvPosition(s"Cached${memDataWidth}d" + "S" + cacheSize + "W" + wayCount + "BPL" + bytePerLine + (if(dBusCmdMasterPipe) "Cmp " else "") + (if(dBusCmdSlavePipe) "Csp " else "") + (if(dBusRspSlavePipe) "Rsp " else "") + (if(relaxedMemoryTranslationRegister) "Rmtr " else "") + (if(earlyWaysHits) "Ewh " else "") + (if(withAmo) "Amo " else "") + (if(withSmp) "Smp " else "") + (if(directTlbHit) "Dtlb " else "") + (if(twoStageMmu) "Tsmmu " else "")) { + override def testParam = s"DBUS=CACHED DBUS_DATA_WIDTH=$memDataWidth " + (if(withLrSc) "LRSC=yes " else "") + (if(withAmo) "AMO=yes " else "") + (if(withSmp) "DBUS_EXCLUSIVE=yes DBUS_INVALIDATE=yes " else "") override def applyOn(config: VexRiscvConfig): Unit = { config.plugins += new DBusCachedPlugin( @@ -447,13 +463,16 @@ class DBusDimension extends VexRiscvDimension("DBus") { wayCount = wayCount, addressWidth = 32, cpuDataWidth = 32, - memDataWidth = 32, + memDataWidth = memDataWidth, catchAccessError = catchAll, catchIllegal = catchAll, catchUnaligned = catchAll, withLrSc = withLrSc, withAmo = withAmo, - earlyWaysHits = earlyWaysHits + earlyWaysHits = earlyWaysHits, + withExclusive = withSmp, + withInvalidate = withSmp, + directTlbHit = directTlbHit ), dBusCmdMasterPipe = dBusCmdMasterPipe, dBusCmdSlavePipe = dBusCmdSlavePipe, @@ -567,8 +586,13 @@ object PlayFuture extends App{ Thread.sleep(8000) } -class MultithreadedFunSuite extends FunSuite { - implicit val ec = ExecutionContext.global +class MultithreadedFunSuite(threadCount : Int) extends FunSuite { + val finalThreadCount = if(threadCount > 0) threadCount else { + new oshi.SystemInfo().getHardware.getProcessor.getLogicalProcessorCount + } + implicit val ec = ExecutionContext.fromExecutorService( + new ForkJoinPool(finalThreadCount, ForkJoinPool.defaultForkJoinWorkerThreadFactory, null, true) + ) class Job(body : => Unit){ val originalOutput = Console.out val buffer = mutable.Queue[Char]() @@ -605,7 +629,7 @@ class MultithreadedFunSuite extends FunSuite { } -class FunTestPara extends MultithreadedFunSuite{ +class FunTestPara extends MultithreadedFunSuite(3){ def createTest(name : String): Unit ={ test(name){ for(i <- 0 to 4) { @@ -617,20 +641,20 @@ class FunTestPara extends MultithreadedFunSuite{ (0 to 80).map(_.toString).foreach(createTest) } -class FunTestPlay extends FunSuite { - def createTest(name : String): Unit ={ - test(name){ - Thread.sleep(500) - for(i <- 0 to 4) { - println(s"$name $i") - Thread.sleep(500) - } - } - } - (0 to 80).map(_.toString).foreach(createTest) -} +//class FunTestPlay extends FunSuite { +// def createTest(name : String): Unit ={ +// test(name){ +// Thread.sleep(500) +// for(i <- 0 to 4) { +// println(s"$name $i") +// Thread.sleep(500) +// } +// } +// } +// (0 to 80).map(_.toString).foreach(createTest) +//} -class TestIndividualFeatures extends MultithreadedFunSuite { +class TestIndividualFeatures extends MultithreadedFunSuite(sys.env.getOrElse("VEXRISCV_REGRESSION_THREAD_COUNT", "0").toInt) { val testCount = sys.env.getOrElse("VEXRISCV_REGRESSION_CONFIG_COUNT", "100").toInt val seed = sys.env.getOrElse("VEXRISCV_REGRESSION_SEED", Random.nextLong().toString).toLong val testId : Set[Int] = sys.env.get("VEXRISCV_REGRESSION_TEST_ID") match { @@ -716,7 +740,7 @@ class TestIndividualFeatures extends MultithreadedFunSuite { //Test RTL val debug = true - val stdCmd = (s"make run REGRESSION_PATH=../../src/test/cpp/regression VEXRISCV_FILE=VexRiscv.v WITH_USER_IO=no REDO=10 TRACE=${if(debug) "yes" else "no"} TRACE_START=1000000000000l FLOW_INFO=no STOP_ON_ERROR=no DHRYSTONE=yes COREMARK=${coremarkRegression} THREAD_COUNT=1 ") + s" SEED=${testSeed} " + val stdCmd = (s"make run REGRESSION_PATH=../../src/test/cpp/regression VEXRISCV_FILE=VexRiscv.v WITH_USER_IO=no REDO=10 TRACE=${if(debug) "yes" else "no"} TRACE_START=100000000000ll FLOW_INFO=no STOP_ON_ERROR=no DHRYSTONE=yes COREMARK=${coremarkRegression} THREAD_COUNT=1 ") + s" SEED=${testSeed} " val testCmd = stdCmd + (positionsToApply).map(_.testParam).mkString(" ") println(testCmd) val str = doCmd(testCmd)