From e3b9e671ec4c7f0e15a499938218f56f6beef078 Mon Sep 17 00:00:00 2001 From: Charles Papon Date: Sat, 8 Apr 2017 17:42:13 +0200 Subject: [PATCH] IBusCachedPlugin add two stage cache option for better FMax and better scaling --- .../Plugin/DecoderSimplePlugin.scala | 2 +- .../SpinalRiscv/Plugin/IBusCachedPlugin.scala | 142 +++++++++++++++--- src/main/scala/SpinalRiscv/TopLevel.scala | 53 ++++--- src/test/cpp/testA/fail.gtkw | 71 ++++++--- src/test/cpp/testA/main.cpp | 2 +- 5 files changed, 202 insertions(+), 68 deletions(-) diff --git a/src/main/scala/SpinalRiscv/Plugin/DecoderSimplePlugin.scala b/src/main/scala/SpinalRiscv/Plugin/DecoderSimplePlugin.scala index 08af71c..9b69e27 100644 --- a/src/main/scala/SpinalRiscv/Plugin/DecoderSimplePlugin.scala +++ b/src/main/scala/SpinalRiscv/Plugin/DecoderSimplePlugin.scala @@ -131,7 +131,7 @@ class DecoderSimplePlugin(catchIllegalInstruction : Boolean) extends Plugin[VexR if(catchIllegalInstruction){ - decodeExceptionPort.valid := arbitration.isValid && !input(LEGAL_INSTRUCTION) + decodeExceptionPort.valid := arbitration.isValid && arbitration.haltIt && !input(LEGAL_INSTRUCTION) //HalitIt to alow decoder stage to wait valid data from 2 stages cache cache decodeExceptionPort.code := 2 decodeExceptionPort.badAddr.assignDontCare() } diff --git a/src/main/scala/SpinalRiscv/Plugin/IBusCachedPlugin.scala b/src/main/scala/SpinalRiscv/Plugin/IBusCachedPlugin.scala index 7763575..87fc59b 100644 --- a/src/main/scala/SpinalRiscv/Plugin/IBusCachedPlugin.scala +++ b/src/main/scala/SpinalRiscv/Plugin/IBusCachedPlugin.scala @@ -13,7 +13,8 @@ case class InstructionCacheConfig( cacheSize : Int, cpuDataWidth : Int, memDataWidth : Int, catchAccessFault : Boolean, - asyncTagMemory : Boolean){ + asyncTagMemory : Boolean, + twoStageLogic : Boolean){ def burstSize = bytePerLine*8/memDataWidth } @@ -52,15 +53,24 @@ class IBusCachedPlugin(config : InstructionCacheConfig) extends Plugin[VexRiscv] //Connect fetch cache side cache.io.cpu.fetch.isValid := fetch.arbitration.isValid cache.io.cpu.fetch.isStuck := fetch.arbitration.isStuck + if(!twoStageLogic) cache.io.cpu.fetch.isStuckByOthers := fetch.arbitration.isStuckByOthers cache.io.cpu.fetch.address := fetch.output(PC) - fetch.arbitration.haltIt setWhen(cache.io.cpu.fetch.haltIt) - fetch.insert(INSTRUCTION) := cache.io.cpu.fetch.data + if(!twoStageLogic) fetch.arbitration.haltIt setWhen(cache.io.cpu.fetch.haltIt) + if(!twoStageLogic) fetch.insert(INSTRUCTION) := cache.io.cpu.fetch.data cache.io.flush.cmd.valid := False + if(twoStageLogic){ + cache.io.cpu.decode.isValid := decode.arbitration.isValid + decode.arbitration.haltIt.setWhen(cache.io.cpu.decode.haltIt) + cache.io.cpu.decode.isStuck := decode.arbitration.isStuck + cache.io.cpu.decode.address := decode.input(PC) + decode.insert(INSTRUCTION) := cache.io.cpu.decode.data + } + if(catchAccessFault){ - fetch.insert(IBUS_ACCESS_ERROR) := cache.io.cpu.fetch.error + if(!twoStageLogic) fetch.insert(IBUS_ACCESS_ERROR) := cache.io.cpu.fetch.error decodeExceptionPort.valid := decode.arbitration.isValid && decode.input(IBUS_ACCESS_ERROR) decodeExceptionPort.code := 1 @@ -71,7 +81,7 @@ class IBusCachedPlugin(config : InstructionCacheConfig) extends Plugin[VexRiscv] -case class InstructionCacheCpuCmd(p : InstructionCacheConfig) extends Bundle with IMasterSlave{ +case class InstructionCacheCpuPrefetch(p : InstructionCacheConfig) extends Bundle with IMasterSlave{ val isValid = Bool val isFiring = Bool val haltIt = Bool @@ -83,7 +93,24 @@ case class InstructionCacheCpuCmd(p : InstructionCacheConfig) extends Bundle wit } } -case class InstructionCacheCpuRsp(p : InstructionCacheConfig) extends Bundle with IMasterSlave { +case class InstructionCacheCpuFetch(p : InstructionCacheConfig) extends Bundle with IMasterSlave { + val isValid = Bool + val haltIt = if(!p.twoStageLogic) Bool else null + val isStuck = Bool + val isStuckByOthers = if(!p.twoStageLogic) Bool else null + val address = UInt(p.addressWidth bit) + val data = if(!p.twoStageLogic) Bits(32 bit) else null + val error = if(!p.twoStageLogic && p.catchAccessFault) Bool else null + + override def asMaster(): Unit = { + out(isValid, isStuck, address) + outWithNull(isStuckByOthers) + inWithNull(error,data,haltIt) + } +} + +case class InstructionCacheCpuDecode(p : InstructionCacheConfig) extends Bundle with IMasterSlave { + require(p.twoStageLogic) val isValid = Bool val haltIt = Bool val isStuck = Bool @@ -98,14 +125,15 @@ case class InstructionCacheCpuRsp(p : InstructionCacheConfig) extends Bundle wit } } - case class InstructionCacheCpuBus(p : InstructionCacheConfig) extends Bundle with IMasterSlave{ - val prefetch = InstructionCacheCpuCmd(p) - val fetch = InstructionCacheCpuRsp(p) + val prefetch = InstructionCacheCpuPrefetch(p) + val fetch = InstructionCacheCpuFetch(p) + val decode = if(p.twoStageLogic) InstructionCacheCpuDecode(p) else null override def asMaster(): Unit = { master(prefetch) master(fetch) + if(p.twoStageLogic) master(decode) } } @@ -181,10 +209,6 @@ class InstructionCache(p : InstructionCacheConfig) extends Component{ val address = UInt(tagRange.length bit) } -// class LineWord extends Bundle{ -// val data = Bits(wordWidth bits) -// val error = Bool -// } val ways = Array.fill(wayCount)(new Area{ val tags = Mem(new LineInfo(),wayLineCount) @@ -247,10 +271,14 @@ class InstructionCache(p : InstructionCacheConfig) extends Component{ val loadedWords = RegNext(loadedWordsNext) val loadedWordsReadable = RegNext(loadedWords) loadedWordsNext := loadedWords + + val waysWritePort = ways(0).datas.writePort //Not multi ways + waysWritePort.valid := io.mem.rsp.valid + waysWritePort.address := request.addr(lineRange) @@ wordIndex + waysWritePort.data := io.mem.rsp.data when(io.mem.rsp.valid){ wordIndex := wordIndex + 1 loadedWordsNext(wordIndex) := True - ways(0).datas(request.addr(lineRange) @@ wordIndex) := io.mem.rsp.data //TODO if(catchAccessFault) loadingWithError setWhen io.mem.rsp.error } @@ -278,7 +306,7 @@ class InstructionCache(p : InstructionCacheConfig) extends Component{ } } - val task = new Area{ + val task = if(!twoStageLogic) new Area{ val waysHitValid = False val waysHitError = Bool.assignDontCare() val waysHitWord = Bits(wordWidth bit) @@ -310,13 +338,93 @@ class InstructionCache(p : InstructionCacheConfig) extends Component{ io.cpu.fetch.haltIt := io.cpu.fetch.isValid && !(waysHitValid || (loaderHitValid && loaderHitReady)) io.cpu.fetch.data := waysHitWord //TODO if(catchAccessFault) io.cpu.fetch.error := (waysHitValid && waysHitError) || (loaderHitValid && loaderHitReady && lineLoader.loadingWithErrorReg) - lineLoader.requestIn.valid := io.cpu.fetch.isValid && ! waysHitValid + lineLoader.requestIn.valid := io.cpu.fetch.isValid && !io.cpu.fetch.isStuckByOthers && !waysHitValid lineLoader.requestIn.addr := io.cpu.fetch.address + } else new Area{ + + val waysHitValid = False + val waysHitError = Bool.assignDontCare() + val waysHitWord = Bits(wordWidth bit) + + val waysRead = for(way <- ways) yield new Area{ + val tag = if(asyncTagMemory) + way.tags.readAsync(io.cpu.fetch.address(lineRange)) + else + way.tags.readSync(io.cpu.prefetch.address(lineRange),enable = !io.cpu.fetch.isStuck) + + val data = way.datas.readSync(io.cpu.prefetch.address(lineRange.high downto wordRange.low),enable = !io.cpu.fetch.isStuck) + waysHitWord := data //Not applicable to multi way + when(tag.valid && tag.address === io.cpu.fetch.address(tagRange)) { + waysHitValid := True + if(catchAccessFault) waysHitError := tag.error + } + + when(lineLoader.request.valid && lineLoader.request.addr(lineRange) === io.cpu.fetch.address(lineRange)){ + waysHitValid := False //Not applicable to multi way + } + } + + + + val loadedWord = new Area{ + val valid = RegNext(lineLoader.waysWritePort.valid) + val address = RegNext(lineLoader.request.addr(tagLineRange) @@ lineLoader.wordIndex @@ U"00") + val data = RegNext(lineLoader.waysWritePort.data) + } + + + val fetchInstructionValid = Bool + val fetchInstructionValue = Bits(32 bits) + val fetchInstructionValidReg = Reg(Bool) + val fetchInstructionValueReg = Reg(Bits(32 bits)) + + when(fetchInstructionValidReg){ + fetchInstructionValid := True + fetchInstructionValue := fetchInstructionValueReg + }.elsewhen(loadedWord.valid && (loadedWord.address >> 2) === (io.cpu.fetch.address >> 2)){ + fetchInstructionValid := True + fetchInstructionValue := loadedWord.data + } otherwise{ + fetchInstructionValid := waysHitValid + fetchInstructionValue := waysHitWord + } + + + when(io.cpu.fetch.isStuck){ + fetchInstructionValidReg := fetchInstructionValid + fetchInstructionValueReg := fetchInstructionValue + } otherwise { + fetchInstructionValidReg := False + } + + + val decodeInstructionValid = Reg(Bool) + val decodeInstructionReg = Reg(Bits(32 bits)) + + when(!io.cpu.decode.isStuck){ + decodeInstructionValid := fetchInstructionValid + decodeInstructionReg := fetchInstructionValue + }.elsewhen(loadedWord.valid && (loadedWord.address >> 2) === (io.cpu.decode.address >> 2)){ + decodeInstructionValid := True + decodeInstructionReg := loadedWord.data + } + + io.cpu.decode.haltIt := io.cpu.decode.isValid && !decodeInstructionValid + io.cpu.decode.data := decodeInstructionReg + + lineLoader.requestIn.valid := io.cpu.decode.isValid && !decodeInstructionValid + lineLoader.requestIn.addr := io.cpu.decode.address + } io.flush.cmd.ready := !(lineLoader.request.valid || io.cpu.fetch.isValid) } -// + + + + + + //object InstructionCacheMain{ // // def main(args: Array[String]) { diff --git a/src/main/scala/SpinalRiscv/TopLevel.scala b/src/main/scala/SpinalRiscv/TopLevel.scala index 41f8f3b..6c38fe7 100644 --- a/src/main/scala/SpinalRiscv/TopLevel.scala +++ b/src/main/scala/SpinalRiscv/TopLevel.scala @@ -200,23 +200,24 @@ object TopLevel { configTest.plugins ++= List( new PcManagerSimplePlugin(0x00000000l, true), - new IBusSimplePlugin( - interfaceKeepData = true, - catchAccessFault = false - ), -// new IBusCachedPlugin( -// config = InstructionCacheConfig( -// cacheSize = 4096, -// bytePerLine =32, -// wayCount = 1, -// wrappedMemAccess = true, -// addressWidth = 32, -// cpuDataWidth = 32, -// memDataWidth = 32, -// catchAccessFault = false, -// asyncTagMemory = false -// ) +// new IBusSimplePlugin( +// interfaceKeepData = true, +// catchAccessFault = false // ), + new IBusCachedPlugin( + config = InstructionCacheConfig( + cacheSize = 4096, + bytePerLine =32, + wayCount = 1, + wrappedMemAccess = true, + addressWidth = 32, + cpuDataWidth = 32, + memDataWidth = 32, + catchAccessFault = false, + asyncTagMemory = false, + twoStageLogic = true + ) + ), new DBusSimplePlugin( catchAddressMisaligned = false, @@ -238,22 +239,22 @@ object TopLevel { catchIllegalInstruction = false ), new RegFilePlugin( - regFileReadyKind = Plugin.SYNC, + regFileReadyKind = Plugin.ASYNC, zeroBoot = false ), new IntAluPlugin, new SrcPlugin( - separatedAddSub = true + separatedAddSub = false ), new FullBarrielShifterPlugin, // new LightShifterPlugin, // new HazardSimplePlugin(true, true, true, true), // new HazardSimplePlugin(false, true, false, true), new HazardSimplePlugin( - bypassExecute = true, - bypassMemory = true, - bypassWriteBack = true, - bypassWriteBackBuffer = true, + bypassExecute = false, + bypassMemory = false, + bypassWriteBack = false, + bypassWriteBackBuffer = false, pessimisticUseSrc = false, pessimisticWriteRegFile = false, pessimisticAddressMatch = false @@ -268,12 +269,13 @@ object TopLevel { ) ) - val toplevel = new VexRiscv(configFull) +// val toplevel = new VexRiscv(configFull) // val toplevel = new VexRiscv(configLight) -// val toplevel = new VexRiscv(configTest) + val toplevel = new VexRiscv(configTest) toplevel.decode.input(toplevel.config.INSTRUCTION).addAttribute(Verilator.public) toplevel.decode.input(toplevel.config.PC).addAttribute(Verilator.public) toplevel.decode.arbitration.isValid.addAttribute(Verilator.public) + toplevel.decode.arbitration.haltIt.addAttribute(Verilator.public) // toplevel.writeBack.input(config.PC).addAttribute(Verilator.public) // toplevel.service(classOf[DecoderSimplePlugin]).bench(toplevel) @@ -285,4 +287,5 @@ object TopLevel { //TODO DivPlugin should not used MixedDivider (double twoComplement) //TODO DivPlugin should register the twoComplement output before pipeline insertion //TODO MulPlugin doesn't fit well on Artix (FMAX) -//TODO PcReg design is unoptimized by Artix synthesis \ No newline at end of file +//TODO PcReg design is unoptimized by Artix synthesis +//TODO FMAX SRC mux + bipass mux prioriti \ No newline at end of file diff --git a/src/test/cpp/testA/fail.gtkw b/src/test/cpp/testA/fail.gtkw index f0e5318..d97694c 100644 --- a/src/test/cpp/testA/fail.gtkw +++ b/src/test/cpp/testA/fail.gtkw @@ -1,42 +1,65 @@ [*] [*] GTKWave Analyzer v3.3.58 (w)1999-2014 BSI -[*] Sat Apr 1 15:43:19 2017 +[*] Sat Apr 8 15:08:01 2017 [*] -[dumpfile] "/home/spinalvm/Spinal/VexRiscv/src/test/cpp/testA/dhrystoneO3.vcd" -[dumpfile_mtime] "Sat Apr 1 15:42:10 2017" -[dumpfile_size] 214475745 +[dumpfile] "/home/spinalvm/Spinal/VexRiscv/src/test/cpp/testA/rv32ui-p-simple.vcd" +[dumpfile_mtime] "Sat Apr 8 15:02:54 2017" +[dumpfile_size] 95378 [savefile] "/home/spinalvm/Spinal/VexRiscv/src/test/cpp/testA/fail.gtkw" -[timestart] 0 +[timestart] 211 [size] 1776 953 [pos] -1 -1 -*-16.000000 553 48755 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 +*-4.422177 320 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 [treeopen] TOP. [treeopen] TOP.VexRiscv. -[sst_width] 313 -[signals_width] 558 +[sst_width] 201 +[signals_width] 397 [sst_expanded] 1 -[sst_vpaned_height] 593 +[sst_vpaned_height] 279 +@800200 +-prefetch @28 -TOP.clk -TOP.reset +TOP.VexRiscv.instructionCache_1.io_cpu_prefetch_haltIt @22 -TOP.VexRiscv.dataCache_1.io_mem_rsp_payload_data[31:0] +TOP.VexRiscv.instructionCache_1.io_cpu_prefetch_address[31:0] +@1000200 +-prefetch +@800200 +-fetch @28 -TOP.VexRiscv.dataCache_1.io_mem_rsp_valid +TOP.VexRiscv.instructionCache_1.io_cpu_fetch_isValid +TOP.VexRiscv.instructionCache_1.io_cpu_fetch_isStuck @22 -TOP.VexRiscv.dataCache_1.io_cpu_writeBack_data[31:0] +TOP.VexRiscv.instructionCache_1.io_cpu_fetch_address[31:0] +@1000200 +-fetch +@800200 +-decode @28 -TOP.VexRiscv.writeBack_MEMORY_ENABLE -TOP.VexRiscv.writeBack_arbitration_isFiring -TOP.VexRiscv.dataCache_1.ways_0_data_port0_enable -@22 -TOP.VexRiscv.dataCache_1.ways_0_data_port0_data[31:0] -@28 -TOP.VexRiscv.dataCache_1.manager_cpuRspIn_ready +TOP.VexRiscv.instructionCache_1.io_cpu_decode_isValid @29 -TOP.VexRiscv.dataCache_1.manager_cpuRspIn_valid +TOP.VexRiscv.instructionCache_1.io_cpu_decode_haltIt @28 -TOP.VexRiscv.dataCache_1.manager_cpuRsp_ready -TOP.VexRiscv.dataCache_1.manager_cpuRsp_valid +TOP.VexRiscv.instructionCache_1.io_cpu_decode_isStuck +@22 +TOP.VexRiscv.instructionCache_1.io_cpu_decode_address[31:0] +TOP.VexRiscv.instructionCache_1.io_cpu_decode_instruction[31:0] +@1000200 +-decode +@800200 +-ibus +@22 +TOP.VexRiscv.instructionCache_1.io_mem_cmd_payload_address[31:0] +@28 +TOP.VexRiscv.instructionCache_1.io_mem_cmd_ready +TOP.VexRiscv.instructionCache_1.io_mem_cmd_valid +@22 +TOP.VexRiscv.instructionCache_1.io_mem_rsp_payload_data[31:0] +@28 +TOP.VexRiscv.instructionCache_1.io_mem_rsp_valid +@1000200 +-ibus +@28 +TOP.VexRiscv.instructionCache_1.clk [pattern_trace] 1 [pattern_trace] 0 diff --git a/src/test/cpp/testA/main.cpp b/src/test/cpp/testA/main.cpp index daab8e9..45ce64d 100644 --- a/src/test/cpp/testA/main.cpp +++ b/src/test/cpp/testA/main.cpp @@ -340,7 +340,7 @@ public: for(SimElement* simElement : simElements) simElement->preCycle(); - if(top->VexRiscv->decode_arbitration_isValid){ + if(top->VexRiscv->decode_arbitration_isValid && !top->VexRiscv->decode_arbitration_haltIt){ uint32_t expectedData; bool dummy; iBusAccess(top->VexRiscv->decode_PC, &expectedData, &dummy);