From e8aa828744ef013c1e01faf44ecc89995bbc5e3c Mon Sep 17 00:00:00 2001 From: Charles Papon Date: Sat, 29 Jul 2017 21:36:30 +0200 Subject: [PATCH] PcPlugin change fastPcCalculation into relaxedPcCalculation relaxedPcCalculation relax timings on the IBusSimple address => better FMax when the CPU is integrated into a SoC --- README.md | 2 +- src/main/scala/vexriscv/TestsWorkspace.scala | 105 +++++++++--------- src/main/scala/vexriscv/demo/GenFull.scala | 2 +- .../scala/vexriscv/demo/GenFullNoMmu.scala | 2 +- .../vexriscv/demo/GenFullNoMmuNoCache.scala | 2 +- .../vexriscv/demo/GenSmallAndPerformant.scala | 2 +- .../scala/vexriscv/demo/GenSmallest.scala | 2 +- .../vexriscv/demo/GenSmallestNoCsr.scala | 2 +- src/main/scala/vexriscv/demo/Murax.scala | 20 ++-- .../vexriscv/plugin/IBusSimplePlugin.scala | 2 +- .../plugin/PcManagerSimplePlugin.scala | 60 ++++++++-- src/test/cpp/murax/main.cpp | 4 +- 12 files changed, 124 insertions(+), 81 deletions(-) diff --git a/README.md b/README.md index 73ed997..9f009c7 100644 --- a/README.md +++ b/README.md @@ -262,7 +262,7 @@ val cpu = new VexRiscv( plugins = List( new PcManagerSimplePlugin( resetVector = 0x00000000l, - fastPcCalculation = true + relaxedPcCalculation = true ), new IBusSimplePlugin( interfaceKeepData = false, diff --git a/src/main/scala/vexriscv/TestsWorkspace.scala b/src/main/scala/vexriscv/TestsWorkspace.scala index cd020e1..90407dd 100644 --- a/src/main/scala/vexriscv/TestsWorkspace.scala +++ b/src/main/scala/vexriscv/TestsWorkspace.scala @@ -31,62 +31,65 @@ object TestsWorkspace { SpinalVerilog { val configFull = VexRiscvConfig( plugins = List( - new PcManagerSimplePlugin(0x00000000l, false), -// new IBusSimplePlugin( -// interfaceKeepData = false, -// catchAccessFault = true -// ), - new IBusCachedPlugin( - config = InstructionCacheConfig( - cacheSize = 4096, - bytePerLine =32, - wayCount = 1, - wrappedMemAccess = true, - addressWidth = 32, - cpuDataWidth = 32, - memDataWidth = 32, - catchIllegalAccess = true, - catchAccessFault = true, - catchMemoryTranslationMiss = true, - asyncTagMemory = false, - twoStageLogic = true - ), - askMemoryTranslation = true, - memoryTranslatorPortConfig = MemoryTranslatorPortConfig( - portTlbSize = 4 - ) + new PcManagerSimplePlugin( + resetVector = 0x00000000l, + relaxedPcCalculation = true ), -// new DBusSimplePlugin( -// catchAddressMisaligned = true, -// catchAccessFault = true, -// earlyInjection = false -// ), - new DBusCachedPlugin( - config = new DataCacheConfig( - cacheSize = 4096, - bytePerLine = 32, - wayCount = 1, - addressWidth = 32, - cpuDataWidth = 32, - memDataWidth = 32, - catchAccessError = true, - catchIllegal = true, - catchUnaligned = true, - catchMemoryTranslationMiss = true - ), -// memoryTranslatorPortConfig = null - memoryTranslatorPortConfig = MemoryTranslatorPortConfig( - portTlbSize = 6 - ) + new IBusSimplePlugin( + interfaceKeepData = false, + catchAccessFault = true ), +// new IBusCachedPlugin( +// config = InstructionCacheConfig( +// cacheSize = 4096, +// bytePerLine =32, +// wayCount = 1, +// wrappedMemAccess = true, +// addressWidth = 32, +// cpuDataWidth = 32, +// memDataWidth = 32, +// catchIllegalAccess = true, +// catchAccessFault = true, +// catchMemoryTranslationMiss = true, +// asyncTagMemory = false, +// twoStageLogic = true +// ), +// askMemoryTranslation = true, +// memoryTranslatorPortConfig = MemoryTranslatorPortConfig( +// portTlbSize = 4 +// ) +// ), + new DBusSimplePlugin( + catchAddressMisaligned = true, + catchAccessFault = true, + earlyInjection = false + ), +// new DBusCachedPlugin( +// config = new DataCacheConfig( +// cacheSize = 4096, +// bytePerLine = 32, +// wayCount = 1, +// addressWidth = 32, +// cpuDataWidth = 32, +// memDataWidth = 32, +// catchAccessError = true, +// catchIllegal = true, +// catchUnaligned = true, +// catchMemoryTranslationMiss = true +// ), +//// memoryTranslatorPortConfig = null +// memoryTranslatorPortConfig = MemoryTranslatorPortConfig( +// portTlbSize = 6 +// ) +// ), // new StaticMemoryTranslatorPlugin( // ioRange = _(31 downto 28) === 0xF // ), - new MemoryTranslatorPlugin( - tlbSize = 32, - virtualRange = _(31 downto 28) === 0xC, - ioRange = _(31 downto 28) === 0xF - ), +// new MemoryTranslatorPlugin( +// tlbSize = 32, +// virtualRange = _(31 downto 28) === 0xC, +// ioRange = _(31 downto 28) === 0xF +// ), new DecoderSimplePlugin( catchIllegalInstruction = true ), diff --git a/src/main/scala/vexriscv/demo/GenFull.scala b/src/main/scala/vexriscv/demo/GenFull.scala index 16c8259..1d83903 100644 --- a/src/main/scala/vexriscv/demo/GenFull.scala +++ b/src/main/scala/vexriscv/demo/GenFull.scala @@ -14,7 +14,7 @@ object GenFull extends App{ plugins = List( new PcManagerSimplePlugin( resetVector = 0x00000000l, - fastPcCalculation = false + relaxedPcCalculation = false ), new IBusCachedPlugin( config = InstructionCacheConfig( diff --git a/src/main/scala/vexriscv/demo/GenFullNoMmu.scala b/src/main/scala/vexriscv/demo/GenFullNoMmu.scala index c9980a4..aee85d6 100644 --- a/src/main/scala/vexriscv/demo/GenFullNoMmu.scala +++ b/src/main/scala/vexriscv/demo/GenFullNoMmu.scala @@ -14,7 +14,7 @@ object GenFullNoMmu extends App{ plugins = List( new PcManagerSimplePlugin( resetVector = 0x00000000l, - fastPcCalculation = false + relaxedPcCalculation = false ), new IBusCachedPlugin( config = InstructionCacheConfig( diff --git a/src/main/scala/vexriscv/demo/GenFullNoMmuNoCache.scala b/src/main/scala/vexriscv/demo/GenFullNoMmuNoCache.scala index 46c8ae6..d3a5945 100644 --- a/src/main/scala/vexriscv/demo/GenFullNoMmuNoCache.scala +++ b/src/main/scala/vexriscv/demo/GenFullNoMmuNoCache.scala @@ -14,7 +14,7 @@ object GenFullNoMmuNoCache extends App{ plugins = List( new PcManagerSimplePlugin( resetVector = 0x00000000l, - fastPcCalculation = false + relaxedPcCalculation = false ), new IBusSimplePlugin( interfaceKeepData = false, diff --git a/src/main/scala/vexriscv/demo/GenSmallAndPerformant.scala b/src/main/scala/vexriscv/demo/GenSmallAndPerformant.scala index 29c6ccc..d5aeb08 100644 --- a/src/main/scala/vexriscv/demo/GenSmallAndPerformant.scala +++ b/src/main/scala/vexriscv/demo/GenSmallAndPerformant.scala @@ -13,7 +13,7 @@ object GenSmallAndProductive extends App{ plugins = List( new PcManagerSimplePlugin( resetVector = 0x00000000l, - fastPcCalculation = false + relaxedPcCalculation = false ), new IBusSimplePlugin( interfaceKeepData = false, diff --git a/src/main/scala/vexriscv/demo/GenSmallest.scala b/src/main/scala/vexriscv/demo/GenSmallest.scala index 801ccd7..92e6bb7 100644 --- a/src/main/scala/vexriscv/demo/GenSmallest.scala +++ b/src/main/scala/vexriscv/demo/GenSmallest.scala @@ -13,7 +13,7 @@ object GenSmallest extends App{ plugins = List( new PcManagerSimplePlugin( resetVector = 0x00000000l, - fastPcCalculation = false + relaxedPcCalculation = false ), new IBusSimplePlugin( interfaceKeepData = false, diff --git a/src/main/scala/vexriscv/demo/GenSmallestNoCsr.scala b/src/main/scala/vexriscv/demo/GenSmallestNoCsr.scala index 5d21111..bbc8d27 100644 --- a/src/main/scala/vexriscv/demo/GenSmallestNoCsr.scala +++ b/src/main/scala/vexriscv/demo/GenSmallestNoCsr.scala @@ -13,7 +13,7 @@ object GenSmallestNoCsr extends App{ plugins = List( new PcManagerSimplePlugin( resetVector = 0x00000000l, - fastPcCalculation = false + relaxedPcCalculation = false ), new IBusSimplePlugin( interfaceKeepData = false, diff --git a/src/main/scala/vexriscv/demo/Murax.scala b/src/main/scala/vexriscv/demo/Murax.scala index 83096e2..a362e60 100644 --- a/src/main/scala/vexriscv/demo/Murax.scala +++ b/src/main/scala/vexriscv/demo/Murax.scala @@ -16,7 +16,8 @@ import vexriscv.{plugin, VexRiscvConfig, VexRiscv} * Created by PIC32F_USER on 28/07/2017. * * Murax is a very light SoC which could work without any external component. - * Tested on ICE40-hx8k device, 60 Mhz, 2150 LC + * - ICE40-hx8k + icestorm => 53 Mhz, 2142 LC + * - 0.37 DMIPS/Mhz * - 8 kB of on-chip ram * - JTAG debugger (eclipse/GDB/openocd ready) * - Interrupt support @@ -36,11 +37,11 @@ case class MuraxConfig(coreFrequency : HertzNumber, object MuraxConfig{ def default = MuraxConfig( - coreFrequency = 12 MHz, - onChipRamSize = 8 kB, - pipelineDBus = false, - pipelineMainBus = true, - pipelineApbBridge = false + coreFrequency = 12 MHz, + onChipRamSize = 8 kB, + pipelineDBus = true, + pipelineMainBus = false, + pipelineApbBridge = true ) } @@ -130,7 +131,7 @@ case class Murax(config : MuraxConfig) extends Component{ plugins = List( new PcManagerSimplePlugin( resetVector = 0x00000000l, - fastPcCalculation = false + relaxedPcCalculation = true ), new IBusSimplePlugin( interfaceKeepData = false, @@ -138,7 +139,8 @@ case class Murax(config : MuraxConfig) extends Component{ ), new DBusSimplePlugin( catchAddressMisaligned = false, - catchAccessFault = false + catchAccessFault = false, + earlyInjection = false ), new CsrPlugin(CsrPluginConfig.smallest), new DecoderSimplePlugin( @@ -186,7 +188,7 @@ case class Murax(config : MuraxConfig) extends Component{ dBus = plugin.dBus else { dBus = cloneOf(plugin.dBus) - dBus.cmd <-< plugin.dBus.cmd + dBus.cmd << plugin.dBus.cmd.halfPipe() dBus.rsp <> plugin.dBus.rsp } } diff --git a/src/main/scala/vexriscv/plugin/IBusSimplePlugin.scala b/src/main/scala/vexriscv/plugin/IBusSimplePlugin.scala index 1e89719..292720e 100644 --- a/src/main/scala/vexriscv/plugin/IBusSimplePlugin.scala +++ b/src/main/scala/vexriscv/plugin/IBusSimplePlugin.scala @@ -115,7 +115,7 @@ class IBusSimplePlugin(interfaceKeepData : Boolean, catchAccessFault : Boolean) val pendingCmd = RegInit(False) clearWhen (iBus.rsp.ready) setWhen (iBus.cmd.fire) //Emit iBus.cmd request - iBus.cmd.valid := prefetch.arbitration.isValid && !prefetch.arbitration.isStuckByOthers && !(pendingCmd && !iBus.rsp.ready) //prefetch.arbitration.isValid && !prefetch.arbitration.isStuckByOthers + iBus.cmd.valid := prefetch.arbitration.isValid && !prefetch.arbitration.removeIt && !prefetch.arbitration.isStuckByOthers && !(pendingCmd && !iBus.rsp.ready) //prefetch.arbitration.isValid && !prefetch.arbitration.isStuckByOthers iBus.cmd.pc := prefetch.output(PC) prefetch.arbitration.haltIt setWhen (!iBus.cmd.ready || (pendingCmd && !iBus.rsp.ready)) } diff --git a/src/main/scala/vexriscv/plugin/PcManagerSimplePlugin.scala b/src/main/scala/vexriscv/plugin/PcManagerSimplePlugin.scala index 5b2f40f..3ec06bb 100644 --- a/src/main/scala/vexriscv/plugin/PcManagerSimplePlugin.scala +++ b/src/main/scala/vexriscv/plugin/PcManagerSimplePlugin.scala @@ -7,7 +7,7 @@ import spinal.lib._ import scala.collection.mutable.ArrayBuffer class PcManagerSimplePlugin(resetVector : BigInt, - fastPcCalculation : Boolean = false) extends Plugin[VexRiscv] with JumpService{ + relaxedPcCalculation : Boolean = false) extends Plugin[VexRiscv] with JumpService{ //FetchService interface case class JumpInfo(interface : Flow[UInt], stage: Stage) val jumpInfos = ArrayBuffer[JumpInfo]() @@ -19,11 +19,56 @@ class PcManagerSimplePlugin(resetVector : BigInt, var prefetchExceptionPort : Flow[ExceptionCause] = null override def setup(pipeline: VexRiscv): Unit = { - pipeline.unremovableStages += pipeline.prefetch + if(!relaxedPcCalculation) pipeline.unremovableStages += pipeline.prefetch } override def build(pipeline: VexRiscv): Unit = { + if(relaxedPcCalculation) + relaxedImpl(pipeline) + else + cycleEffectiveImpl(pipeline) + } + + //reduce combinatorial path, and expose the PC to the pipeline as a register + def relaxedImpl(pipeline: VexRiscv): Unit = { + import pipeline.config._ + import pipeline.prefetch + + prefetch plug new Area { + import prefetch._ + //Stage always valid + arbitration.isValid := True + + //PC calculation without Jump + val pcReg = Reg(UInt(32 bits)) init(resetVector) addAttribute(Verilator.public) + when(arbitration.isFiring){ + pcReg := pcReg + 4 + } + + //JumpService hardware implementation + val jump = if(jumpInfos.length != 0) new Area { + val sortedByStage = jumpInfos.sortWith((a, b) => pipeline.indexOf(a.stage) > pipeline.indexOf(b.stage)) + val valids = sortedByStage.map(_.interface.valid) + val pcs = sortedByStage.map(_.interface.payload) + + val pcLoad = Flow(UInt(32 bits)) + pcLoad.valid := jumpInfos.map(_.interface.valid).orR + pcLoad.payload := MuxOH(OHMasking.first(valids.asBits), pcs) + + //application of the selected jump request + when(pcLoad.valid) { + pcReg := pcLoad.payload + } + } + + insert(PC_CALC_WITHOUT_JUMP) := pcReg + insert(PC) := pcReg + } + } + + //Jump take effect instantly (save one cycle), but expose the PC to the pipeline as a 'long' combinatorial path + def cycleEffectiveImpl(pipeline: VexRiscv): Unit = { import pipeline.config._ import pipeline.prefetch @@ -35,19 +80,12 @@ class PcManagerSimplePlugin(resetVector : BigInt, //PC calculation without Jump val pcReg = Reg(UInt(32 bits)) init(resetVector) addAttribute(Verilator.public) val inc = RegInit(False) - val pcBeforeJumps = if(fastPcCalculation){ - val pcPlus4 = pcReg + U(4) - pcPlus4.addAttribute("keep") - Mux(inc,pcPlus4,pcReg) - }else{ - pcReg + Mux[UInt](inc,4,0) - } - + val pcBeforeJumps = pcReg + (inc ## B"00").asUInt insert(PC_CALC_WITHOUT_JUMP) := pcBeforeJumps val pc = UInt(32 bits) pc := input(PC_CALC_WITHOUT_JUMP) - val samplePcNext = False //TODO FMAX + val samplePcNext = False //JumpService hardware implementation val jump = if(jumpInfos.length != 0) new Area { diff --git a/src/test/cpp/murax/main.cpp b/src/test/cpp/murax/main.cpp index 064d497..94a346e 100644 --- a/src/test/cpp/murax/main.cpp +++ b/src/test/cpp/murax/main.cpp @@ -19,8 +19,8 @@ public: timeProcesses.push_back(jtag); #ifdef TRACE - speedFactor = 10e-3; - cout << "Simulation caped to " << speedFactor << " of real time"<< endl; + //speedFactor = 10e-3; + //cout << "Simulation caped to " << speedFactor << " of real time"<< endl; #endif } };