From 506e0e3f60cc89bb666e89bef11be13c3e7c2248 Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Fri, 16 Feb 2018 02:21:08 +0100 Subject: [PATCH 1/4] New faster/smaller/multi way instruction cache design. Single or dual stage --- src/main/scala/vexriscv/Pipeline.scala | 1 + src/main/scala/vexriscv/Stage.scala | 2 + src/main/scala/vexriscv/TestsWorkspace.scala | 15 +- src/main/scala/vexriscv/VexRiscv.scala | 1 + .../scala/vexriscv/demo/FormalSimple.scala | 2 +- .../scala/vexriscv/demo/SynthesisBench.scala | 36 +- .../scala/vexriscv/ip/InstructionCache.scala | 365 +++++++----------- .../scala/vexriscv/plugin/DebugPlugin.scala | 52 ++- .../{FomalPlugin.scala => FormalPlugin.scala} | 2 +- .../plugin/HaltOnExceptionPlugin.scala | 8 +- .../vexriscv/plugin/IBusCachedPlugin.scala | 78 ++-- src/test/cpp/regression/main.cpp | 14 +- src/test/cpp/regression/makefile | 1 + src/test/scala/vexriscv/Play.scala | 119 ++++++ 14 files changed, 378 insertions(+), 318 deletions(-) rename src/main/scala/vexriscv/plugin/{FomalPlugin.scala => FormalPlugin.scala} (98%) create mode 100644 src/test/scala/vexriscv/Play.scala diff --git a/src/main/scala/vexriscv/Pipeline.scala b/src/main/scala/vexriscv/Pipeline.scala index 7f555d3..86a2018 100644 --- a/src/main/scala/vexriscv/Pipeline.scala +++ b/src/main/scala/vexriscv/Pipeline.scala @@ -119,6 +119,7 @@ trait Pipeline { for(stageIndex <- 0 until stages.length; stage = stages(stageIndex)){ stage.arbitration.isStuckByOthers := stage.arbitration.haltByOther || stages.takeRight(stages.length - stageIndex - 1).map(s => s.arbitration.haltItself/* && !s.arbitration.removeIt*/).foldLeft(False)(_ || _) stage.arbitration.isStuck := stage.arbitration.haltItself || stage.arbitration.isStuckByOthers + stage.arbitration.isMoving := !stage.arbitration.isStuck && !stage.arbitration.removeIt stage.arbitration.isFiring := stage.arbitration.isValid && !stage.arbitration.isStuck && !stage.arbitration.removeIt } diff --git a/src/main/scala/vexriscv/Stage.scala b/src/main/scala/vexriscv/Stage.scala index 57d2a8d..c820754 100644 --- a/src/main/scala/vexriscv/Stage.scala +++ b/src/main/scala/vexriscv/Stage.scala @@ -48,11 +48,13 @@ class Stage() extends Area{ val haltByOther = False //When settable, stuck the instruction, should only be set by something else than the stucked instruction val removeIt = False //When settable, unschedule the instruction as if it was never executed (no side effect) val flushAll = False //When settable, unschedule instructions in the current stage and all prior ones + val redoIt = False //Allow to notify that a given instruction in a pipeline is rescheduled val isValid = RegInit(False) //Inform if a instruction is in the current stage val isStuck = Bool //Inform if the instruction is stuck (haltItself || haltByOther) val isStuckByOthers = Bool //Inform if the instruction is stuck by sombody else def isRemoved = removeIt //Inform if the instruction is going to be unschedule the current cycle val isFlushed = Bool //Inform if the instruction is flushed (flushAll set in the current or subsequents stages) + val isMoving = Bool //Inform if the instruction is going somewere else (next stage or unscheduled) val isFiring = Bool //Inform if the current instruction will go to the next stage the next cycle (isValid && !isStuck && !removeIt) } diff --git a/src/main/scala/vexriscv/TestsWorkspace.scala b/src/main/scala/vexriscv/TestsWorkspace.scala index 15748d4..9c1b4a4 100644 --- a/src/main/scala/vexriscv/TestsWorkspace.scala +++ b/src/main/scala/vexriscv/TestsWorkspace.scala @@ -41,9 +41,9 @@ object TestsWorkspace { // ), new IBusCachedPlugin( config = InstructionCacheConfig( - cacheSize = 4096*4, - bytePerLine =32, - wayCount = 1, + cacheSize = 4096, + bytePerLine = 32, + wayCount = 4, wrappedMemAccess = true, addressWidth = 32, cpuDataWidth = 32, @@ -52,10 +52,11 @@ object TestsWorkspace { catchAccessFault = true, catchMemoryTranslationMiss = true, asyncTagMemory = false, - twoStageLogic = true + twoStageLogic = false, + twoCycleRam = true ), askMemoryTranslation = true, - memoryTranslatorPortConfig = MemoryTranslatorPortConfig( + memoryTranslatorPortConfig = MemoryTranslatorPortConfig( portTlbSize = 4 ) ), @@ -95,7 +96,7 @@ object TestsWorkspace { catchIllegalInstruction = true ), new RegFilePlugin( - regFileReadyKind = plugin.SYNC, + regFileReadyKind = plugin.ASYNC, zeroBoot = false ), new IntAluPlugin, @@ -122,7 +123,7 @@ object TestsWorkspace { new BranchPlugin( earlyBranch = true, catchAddressMisaligned = true, - prediction = DYNAMIC_TARGET, + prediction = NONE, historyRamSizeLog2 = 8 ), new YamlPlugin("cpu0.yaml") diff --git a/src/main/scala/vexriscv/VexRiscv.scala b/src/main/scala/vexriscv/VexRiscv.scala index 606ca3f..44530af 100644 --- a/src/main/scala/vexriscv/VexRiscv.scala +++ b/src/main/scala/vexriscv/VexRiscv.scala @@ -66,6 +66,7 @@ class VexRiscv(val config : VexRiscvConfig) extends Component with Pipeline{ decode.input(config.INSTRUCTION).addAttribute(Verilator.public) decode.input(config.PC).addAttribute(Verilator.public) decode.arbitration.isValid.addAttribute(Verilator.public) + decode.arbitration.flushAll.addAttribute(Verilator.public) decode.arbitration.haltItself.addAttribute(Verilator.public) writeBack.input(config.INSTRUCTION) keep() addAttribute(Verilator.public) writeBack.input(config.PC) keep() addAttribute(Verilator.public) diff --git a/src/main/scala/vexriscv/demo/FormalSimple.scala b/src/main/scala/vexriscv/demo/FormalSimple.scala index 23761ab..0442790 100644 --- a/src/main/scala/vexriscv/demo/FormalSimple.scala +++ b/src/main/scala/vexriscv/demo/FormalSimple.scala @@ -11,7 +11,7 @@ object FormalSimple extends App{ def cpu() = new VexRiscv( config = VexRiscvConfig( plugins = List( - new FomalPlugin, + new FormalPlugin, new HaltOnExceptionPlugin, new PcManagerSimplePlugin( resetVector = 0x00000000l, diff --git a/src/main/scala/vexriscv/demo/SynthesisBench.scala b/src/main/scala/vexriscv/demo/SynthesisBench.scala index 35b7f12..9175061 100644 --- a/src/main/scala/vexriscv/demo/SynthesisBench.scala +++ b/src/main/scala/vexriscv/demo/SynthesisBench.scala @@ -12,24 +12,24 @@ object VexRiscvSynthesisBench { def main(args: Array[String]) { def wrap(that : => Component) : Component = that - //Wrap with input/output registers - // def wrap(that : => Component) : Component = { - // //new WrapWithReg.Wrapper(that) - // val c = that - // c.rework { - // for (e <- c.getOrdredNodeIo) { - // if (e.isInput) { - // e.asDirectionLess() - // e := RegNext(RegNext(in(cloneOf(e)))) - // - // } else { - // e.asDirectionLess() - // out(cloneOf(e)) := RegNext(RegNext(e)) - // } - // } - // } - // c - // } +// Wrap with input/output registers +// def wrap(that : => Component) : Component = { +// //new WrapWithReg.Wrapper(that) +// val c = that +// c.rework { +// for (e <- c.getOrdredNodeIo) { +// if (e.isInput) { +// e.asDirectionLess() +// e := RegNext(RegNext(in(cloneOf(e)))) +// +// } else { +// e.asDirectionLess() +// out(cloneOf(e)) := RegNext(RegNext(e)) +// } +// } +// } +// c +// } val smallestNoCsr = new Rtl { override def getName(): String = "VexRiscv smallest no CSR" diff --git a/src/main/scala/vexriscv/ip/InstructionCache.scala b/src/main/scala/vexriscv/ip/InstructionCache.scala index eceec6f..87ffec2 100644 --- a/src/main/scala/vexriscv/ip/InstructionCache.scala +++ b/src/main/scala/vexriscv/ip/InstructionCache.scala @@ -18,7 +18,11 @@ case class InstructionCacheConfig( cacheSize : Int, catchAccessFault : Boolean, catchMemoryTranslationMiss : Boolean, asyncTagMemory : Boolean, - twoStageLogic : Boolean){ + twoStageLogic : Boolean, + twoCycleRam : Boolean = false, + preResetFlush : Boolean = false){ + + def dataOnDecode = twoCycleRam && wayCount > 1 def burstSize = bytePerLine*8/memDataWidth def catchSomething = catchAccessFault || catchMemoryTranslationMiss || catchIllegalAccess @@ -47,72 +51,65 @@ case class InstructionCacheConfig( cacheSize : Int, case class InstructionCacheCpuPrefetch(p : InstructionCacheConfig) extends Bundle with IMasterSlave{ val isValid = Bool - val isFiring = Bool val haltIt = Bool - val address = UInt(p.addressWidth bit) + val pc = UInt(p.addressWidth bit) override def asMaster(): Unit = { - out(isValid, isFiring, address) + out(isValid, pc) in(haltIt) } } case class InstructionCacheCpuFetch(p : InstructionCacheConfig) extends Bundle with IMasterSlave { val isValid = Bool - val haltIt = if(!p.twoStageLogic) Bool else null val isStuck = Bool - val isStuckByOthers = if(!p.twoStageLogic) Bool else null - val address = UInt(p.addressWidth bit) - val data = if(!p.twoStageLogic) Bits(32 bit) else null - val error = if(!p.twoStageLogic && p.catchAccessFault) Bool else null - val mmuBus = if(p.twoStageLogic) MemoryTranslatorBus() else null + val pc = UInt(p.addressWidth bits) + val data = Bits(p.cpuDataWidth bits) + val mmuBus = MemoryTranslatorBus() override def asMaster(): Unit = { - out(isValid, isStuck, address) - outWithNull(isStuckByOthers) - inWithNull(error,data,haltIt) + out(isValid, isStuck, pc) + inWithNull(data) slaveWithNull(mmuBus) } } + case class InstructionCacheCpuDecode(p : InstructionCacheConfig) extends Bundle with IMasterSlave { - require(p.twoStageLogic) val isValid = Bool - val haltIt = Bool - val isStuck = Bool val isUser = Bool - val address = UInt(p.addressWidth bit) - val data = Bits(32 bit) - val dataAnticipated = Bits(32 bits) + val isStuck = Bool + val pc = UInt(p.addressWidth bits) + val redo = Bool + val data = ifGen(p.dataOnDecode) (Bits(p.cpuDataWidth bits)) val error = if(p.catchAccessFault) Bool else null val mmuMiss = if(p.catchMemoryTranslationMiss) Bool else null val illegalAccess = if(p.catchIllegalAccess) Bool else null override def asMaster(): Unit = { - out(isValid, isStuck, address, isUser) - in(haltIt, data, dataAnticipated) - inWithNull(error,mmuMiss,illegalAccess) + out(isValid, isUser, isStuck, pc) + in(redo) + inWithNull(error,mmuMiss,illegalAccess,data) } } case class InstructionCacheCpuBus(p : InstructionCacheConfig) extends Bundle with IMasterSlave{ val prefetch = InstructionCacheCpuPrefetch(p) val fetch = InstructionCacheCpuFetch(p) - val decode = if(p.twoStageLogic) InstructionCacheCpuDecode(p) else null + val decode = InstructionCacheCpuDecode(p) override def asMaster(): Unit = { - master(prefetch) - master(fetch) - if(p.twoStageLogic) master(decode) + master(prefetch, fetch, decode) } } case class InstructionCacheMemCmd(p : InstructionCacheConfig) extends Bundle{ val address = UInt(p.addressWidth bit) + val size = UInt(log2Up(log2Up(p.bytePerLine) + 1) bits) } case class InstructionCacheMemRsp(p : InstructionCacheConfig) extends Bundle{ - val data = Bits(32 bit) + val data = Bits(p.memDataWidth bit) val error = Bool } @@ -173,21 +170,21 @@ case class InstructionCacheFlushBus() extends Bundle with IMasterSlave{ class InstructionCache(p : InstructionCacheConfig) extends Component{ import p._ - assert(wayCount == 1) - assert(cpuDataWidth == memDataWidth) + assert(cpuDataWidth == memDataWidth, "Need testing") val io = new Bundle{ val flush = slave(InstructionCacheFlushBus()) - // val translator = master(InstructionCacheTranslationBus(p)) val cpu = slave(InstructionCacheCpuBus(p)) val mem = master(InstructionCacheMemBus(p)) } - // val haltCpu = False + val lineWidth = bytePerLine*8 val lineCount = cacheSize/bytePerLine val wordWidth = Math.max(memDataWidth,32) val wordWidthLog2 = log2Up(wordWidth) val wordPerLine = lineWidth/wordWidth + val memWordPerLine = lineWidth/memDataWidth val bytePerWord = wordWidth/8 + val bytePerMemWord = memDataWidth/8 val wayLineCount = lineCount/wayCount val wayLineLog2 = log2Up(wayLineCount) val wayWordCount = wayLineCount * wordPerLine @@ -195,44 +192,41 @@ class InstructionCache(p : InstructionCacheConfig) extends Component{ val tagRange = addressWidth-1 downto log2Up(wayLineCount*bytePerLine) val lineRange = tagRange.low-1 downto log2Up(bytePerLine) val wordRange = log2Up(bytePerLine)-1 downto log2Up(bytePerWord) + val memWordRange = log2Up(bytePerLine)-1 downto log2Up(bytePerMemWord) + val memWordToCpuWordRange = log2Up(bytePerMemWord)-1 downto log2Up(bytePerWord) val tagLineRange = tagRange.high downto lineRange.low val lineWordRange = lineRange.high downto wordRange.low - class LineInfo extends Bundle{ + case class LineTag() extends Bundle{ val valid = Bool - val loading = Bool - val error = if(catchAccessFault) Bool else null + val error = Bool val address = UInt(tagRange.length bit) } - class LineInfoWithHit extends LineInfo{ - val hit = Bool - } - def LineInfoWithHit(lineInfo : LineInfo, testTag : UInt) = { - val ret = new LineInfoWithHit() - ret.assignSomeByName(lineInfo) - ret.hit := lineInfo.valid && lineInfo.address === testTag - ret - } + val ways = Seq.fill(wayCount)(new Area{ + val tags = Mem(LineTag(),wayLineCount) + val datas = Mem(Bits(memDataWidth bits),wayWordCount) - - val ways = Array.fill(wayCount)(new Area{ - val tags = Mem(new LineInfo(),wayLineCount) - val datas = Mem(Bits(wordWidth bits),wayWordCount) + if(preResetFlush){ + tags.initBigInt(List.fill(wayLineCount)(BigInt(0))) + } }) - io.cpu.prefetch.haltIt := False + + + val lineLoader = new Area{ - val requestIn = Stream(wrap(new Bundle{ - val addr = UInt(addressWidth bits) - })) + val fire = False + val valid = RegInit(False) clearWhen(fire) + val address = Reg(UInt(addressWidth bits)) + val hadError = RegInit(False) clearWhen(fire) + io.cpu.prefetch.haltIt setWhen(valid) - - val flushCounter = Reg(UInt(log2Up(wayLineCount) + 1 bit)) init(0) + val flushCounter = Reg(UInt(log2Up(wayLineCount) + 1 bit)) init(if(preResetFlush) wayLineCount else 0) when(!flushCounter.msb){ io.cpu.prefetch.haltIt := True flushCounter := flushCounter + 1 @@ -241,6 +235,7 @@ class InstructionCache(p : InstructionCacheConfig) extends Component{ io.cpu.prefetch.haltIt := True } val flushFromInterface = RegInit(False) + io.flush.cmd.ready := !(valid || io.cpu.fetch.isValid) //io.cpu.fetch.isValid will avoid bug on first cycle miss when(io.flush.cmd.valid){ io.cpu.prefetch.haltIt := True when(io.flush.cmd.ready){ @@ -251,204 +246,110 @@ class InstructionCache(p : InstructionCacheConfig) extends Component{ io.flush.rsp := flushCounter.msb.rise && flushFromInterface - val loadingWithErrorReg = if(catchAccessFault) RegInit(False) else null - val loadingWithError = if(catchAccessFault) Bool else null - if(catchAccessFault) { - loadingWithError := loadingWithErrorReg - loadingWithErrorReg := loadingWithError + + + val cmdSent = RegInit(False) setWhen(io.mem.cmd.fire) clearWhen(fire) + io.mem.cmd.valid := valid && !cmdSent + io.mem.cmd.address := address(tagRange.high downto lineRange.low) @@ U(0,lineRange.low bit) + io.mem.cmd.size := log2Up(p.bytePerLine) + + val wayToAllocate = Counter(wayCount, fire) + val wordIndex = Reg(UInt(log2Up(memWordPerLine) bits)) init(0) + + + val write = new Area{ + val tag = ways.map(_.tags.writePort) + val data = ways.map(_.datas.writePort) } + for(wayId <- 0 until wayCount){ + val wayHit = wayToAllocate === wayId + val tag = write.tag(wayId) + tag.valid := ((wayHit && fire) || !flushCounter.msb) + tag.address := (flushCounter.msb ? address(lineRange) | flushCounter(flushCounter.high-1 downto 0)) + tag.data.valid := flushCounter.msb + tag.data.error := hadError || io.mem.rsp.error + tag.data.address := address(tagRange) - - val request = requestIn.stage() - - - //Send memory requests - val memCmdSended = RegInit(False) setWhen(io.mem.cmd.fire) - io.mem.cmd.valid := request.valid && !memCmdSended - if(wrappedMemAccess) - io.mem.cmd.address := request.addr(tagRange.high downto wordRange.low) @@ U(0,wordRange.low bit) - else - io.mem.cmd.address := request.addr(tagRange.high downto lineRange.low) @@ U(0,lineRange.low bit) - - val wordIndex = Reg(UInt(log2Up(wordPerLine) bit)) - val loadedWordsNext = Bits(wordPerLine bit) - val loadedWords = RegNext(loadedWordsNext) - val loadedWordsReadable = RegNext(loadedWords) - loadedWordsNext := loadedWords - - val waysDatasWritePort = ways(0).datas.writePort //Not multi ways - waysDatasWritePort.valid := io.mem.rsp.valid - waysDatasWritePort.address := request.addr(lineRange) @@ wordIndex - waysDatasWritePort.data := io.mem.rsp.data - when(io.mem.rsp.valid){ - wordIndex := wordIndex + 1 - loadedWordsNext(wordIndex) := True - if(catchAccessFault) loadingWithError setWhen io.mem.rsp.error + val data = write.data(wayId) + data.valid := io.mem.rsp.valid && wayHit + data.address := address(lineRange) @@ wordIndex + data.data := io.mem.rsp.data } - val memRspLast = loadedWordsNext === B(loadedWordsNext.range -> true) - - val readyDelay = Reg(UInt(1 bit)) - when(memRspLast){ - readyDelay := readyDelay + 1 - } - request.ready := readyDelay === 1 - - val waysTagsWritePort = ways(0).tags.writePort //not multi way - waysTagsWritePort.valid := io.mem.rsp.valid || !flushCounter.msb - waysTagsWritePort.address := Mux(flushCounter.msb,request.addr(lineRange),flushCounter(flushCounter.high-1 downto 0)) - waysTagsWritePort.data.valid := flushCounter.msb - waysTagsWritePort.data.address := request.addr(tagRange) - waysTagsWritePort.data.loading := !memRspLast - if(catchAccessFault) waysTagsWritePort.data.error := loadingWithError - - - when(requestIn.ready){ - memCmdSended := False - wordIndex := requestIn.addr(wordRange) - loadedWords := 0 - loadedWordsReadable := 0 - readyDelay := 0 - if(catchAccessFault) loadingWithErrorReg := False + when(io.mem.rsp.valid) { + wordIndex := (wordIndex + 1).resized + hadError.setWhen(io.mem.rsp.error) + when(wordIndex === wordIndex.maxValue) { + fire := True + } } } - val task = if(!twoStageLogic) new Area{ - val waysHitValid = False - val waysHitError = Bool.assignDontCare() - val waysHitWord = Bits(wordWidth bit) - val waysRead = for(way <- ways) yield new Area{ - val readAddress = Mux(io.cpu.fetch.isStuck,io.cpu.fetch.address,io.cpu.prefetch.address) //TODO FMAX - val tag = if(asyncTagMemory) - way.tags.readAsync(io.cpu.fetch.address(lineRange),writeFirst) - else - way.tags.readSync(readAddress(lineRange),readUnderWrite = readFirst) - - val data = way.datas.readSync(readAddress(lineRange.high downto wordRange.low)) - waysHitWord := data //Not applicable to multi way - when(tag.valid && tag.address === io.cpu.fetch.address(tagRange)) { - waysHitValid := True - if(catchAccessFault) waysHitError := tag.error + val fetchStage = new Area{ + val read = new Area{ + val waysValues = for(way <- ways) yield new Area{ + val tag = if(asyncTagMemory) { + way.tags.readAsync(io.cpu.fetch.pc(lineRange)) + }else { + way.tags.readSync(io.cpu.prefetch.pc(lineRange), !io.cpu.fetch.isStuck) + } + val data = way.datas.readSync(io.cpu.prefetch.pc(lineRange.high downto memWordRange.low), !io.cpu.fetch.isStuck) } } - val hit = waysHitValid && !(waysRead(0).tag.loading && !(if(asyncTagMemory) lineLoader.loadedWords else RegNext(lineLoader.loadedWords))(io.cpu.fetch.address(wordRange))) - io.cpu.fetch.haltIt := io.cpu.fetch.isValid && !hit - io.cpu.fetch.data := waysHitWord - if(catchAccessFault) io.cpu.fetch.error := waysRead(0).tag.error - lineLoader.requestIn.valid := io.cpu.fetch.isValid && !hit //TODO avoid duplicated request - lineLoader.requestIn.addr := io.cpu.fetch.address - } else new Area{ - //Long readValidPath - // def writeFirstMemWrap[T <: Data](readValid : Bool, readAddress : UInt, lastAddress : UInt, readData : T,writeValid : Bool, writeAddress : UInt, writeData : T) : T = { - // val hit = writeValid && (readValid ? readAddress | lastAddress) === writeAddress - // val overrideValid = RegInit(False) clearWhen(readValid) setWhen(hit) - // val overrideValue = RegNextWhen(writeData,hit) - // overrideValid ? overrideValue | readData - // } + val hit = if(!twoCycleRam) new Area{ + val hits = read.waysValues.map(way => way.tag.valid && way.tag.address === io.cpu.fetch.mmuBus.rsp.physicalAddress(tagRange)) + val valid = Cat(hits).orR + val id = OHToUInt(hits) + val error = read.waysValues.map(_.tag.error).read(id) + val data = read.waysValues.map(_.data).read(id) + val word = data.subdivideIn(cpuDataWidth bits).read(io.cpu.fetch.pc(memWordToCpuWordRange)) + io.cpu.fetch.data := word + } else null - //shot readValid path - def writeFirstMemWrap[T <: Data](readValid : Bool, readLastAddress : UInt, readData : T,writeValid : Bool, writeAddress : UInt, writeData : T) : T = { - val writeSample = readValid || (writeValid && writeAddress === readLastAddress) - val writeValidReg = RegNextWhen(writeValid,writeSample) - val writeAddressReg = RegNextWhen(writeAddress,writeSample) - val writeDataReg = RegNextWhen(writeData,writeSample) - (writeValidReg && writeAddressReg === readLastAddress) ? writeDataReg | readData + if(twoCycleRam && wayCount == 1){ + io.cpu.fetch.data := read.waysValues.head.data.subdivideIn(cpuDataWidth bits).read(io.cpu.fetch.pc(memWordToCpuWordRange)) } - //Long sample path - // def writeFirstRegWrap[T <: Data](sample : Bool, sampleAddress : UInt,lastAddress : UInt, readData : T, writeValid : Bool, writeAddress : UInt, writeData : T) : (T,T) = { - // val hit = writeValid && (sample ? sampleAddress | lastAddress) === writeAddress - // val bypass = hit ? writeData | readData - // val reg = RegNextWhen(bypass,sample || hit) - // (reg,bypass) - // } - - //Short sample path - def writeFirstRegWrap[T <: Data](sample : Bool, sampleAddress : UInt,sampleLastAddress : UInt, readData : T, writeValid : Bool, writeAddress : UInt, writeData : T) = { - val preWrite = (writeValid && sampleAddress === writeAddress) - val postWrite = (writeValid && sampleLastAddress === writeAddress) - val bypass = (!sample || preWrite) ? writeData | readData - val regEn = sample || postWrite - val reg = RegNextWhen(bypass,regEn) - (reg,bypass,regEn,preWrite,postWrite) - } - // def writeFirstRegWrap[T <: Data](sample : Bool, sampleAddress : UInt,sampleLastAddress : UInt, readData : T, writeValid : Bool, writeAddress : UInt, writeData : T) = { - // val bypass = (!sample || (writeValid && sampleAddress === writeAddress)) ? writeData | readData - // val regEn = sample || (writeValid && sampleLastAddress === writeAddress) - // val reg = RegNextWhen(bypass,regEn) - // (reg,bypass,regEn,False,False) - // } - require(wayCount == 1) - val memRead = new Area{ - val way = ways(0) - val tag = if(asyncTagMemory) - way.tags.readAsync(io.cpu.fetch.address(lineRange),writeFirst) - else - writeFirstMemWrap( - readValid = !io.cpu.fetch.isStuck, - // readAddress = io.cpu.prefetch.address(lineRange), - readLastAddress = io.cpu.fetch.address(lineRange), - readData = way.tags.readSync(io.cpu.prefetch.address(lineRange),enable = !io.cpu.fetch.isStuck), - writeValid = lineLoader.waysTagsWritePort.valid, - writeAddress = lineLoader.waysTagsWritePort.address, - writeData = lineLoader.waysTagsWritePort.data - ) - - val data = writeFirstMemWrap( - readValid = !io.cpu.fetch.isStuck, - // readAddress = io.cpu.prefetch.address(lineWordRange), - readLastAddress = io.cpu.fetch.address(lineWordRange), - readData = way.datas.readSync(io.cpu.prefetch.address(lineWordRange),enable = !io.cpu.fetch.isStuck), - writeValid = lineLoader.waysDatasWritePort.valid, - writeAddress = lineLoader.waysDatasWritePort.address, - writeData = lineLoader.waysDatasWritePort.data - ) - } - - - val tag = writeFirstRegWrap( - sample = !io.cpu.decode.isStuck, - sampleAddress = io.cpu.fetch.address(lineRange), - sampleLastAddress = io.cpu.decode.address(lineRange), - readData = LineInfoWithHit(memRead.tag,io.cpu.fetch.address(tagRange)), - writeValid = lineLoader.waysTagsWritePort.valid, - writeAddress = lineLoader.waysTagsWritePort.address, - writeData = LineInfoWithHit(lineLoader.waysTagsWritePort.data,io.cpu.fetch.address(tagRange)) //TODO wrong address src - )._1 - - val (data,dataRegIn,dataRegEn,dataPreWrite,dataPostWrite) = writeFirstRegWrap( - sample = !io.cpu.decode.isStuck, - sampleAddress = io.cpu.fetch.address(lineWordRange), - sampleLastAddress = io.cpu.decode.address(lineWordRange), - readData = memRead.data, - writeValid = lineLoader.waysDatasWritePort.valid, - writeAddress = lineLoader.waysDatasWritePort.address, - writeData = lineLoader.waysDatasWritePort.data - ) - io.cpu.fetch.mmuBus.cmd.isValid := io.cpu.fetch.isValid - io.cpu.fetch.mmuBus.cmd.virtualAddress := io.cpu.fetch.address + io.cpu.fetch.mmuBus.cmd.virtualAddress := io.cpu.fetch.pc io.cpu.fetch.mmuBus.cmd.bypassTranslation := False - val mmuRsp = RegNextWhen(io.cpu.fetch.mmuBus.rsp,!io.cpu.decode.isStuck) - - val hit = tag.valid && tag.address === mmuRsp.physicalAddress(tagRange) && !(tag.loading && !lineLoader.loadedWords(mmuRsp.physicalAddress(wordRange))) - - io.cpu.decode.haltIt := io.cpu.decode.isValid && !hit //TODO PERF not halit it when removed, Should probably be applyed in many other places - io.cpu.decode.data := data - // io.cpu.decode.dataAnticipated := dataRegEn ? dataRegIn | data - io.cpu.decode.dataAnticipated := io.cpu.decode.isStuck ? Mux(dataPostWrite,lineLoader.waysDatasWritePort.data,data) | Mux(dataPreWrite,lineLoader.waysDatasWritePort.data,memRead.data) - if(catchAccessFault) io.cpu.decode.error := tag.error - if(catchMemoryTranslationMiss) io.cpu.decode.mmuMiss := mmuRsp.miss - if(catchIllegalAccess) io.cpu.decode.illegalAccess := !mmuRsp.allowExecute || (io.cpu.decode.isUser && !mmuRsp.allowUser) - - lineLoader.requestIn.valid := io.cpu.decode.isValid && !hit && !mmuRsp.miss//TODO avoid duplicated request - lineLoader.requestIn.addr := mmuRsp.physicalAddress } - io.flush.cmd.ready := !(lineLoader.request.valid || io.cpu.fetch.isValid || (if(twoStageLogic) io.cpu.decode.isValid else False)) + + val decodeStage = new Area{ + def stage[T <: Data](that : T) = RegNextWhen(that,!io.cpu.decode.isStuck) + val mmuRsp = stage(io.cpu.fetch.mmuBus.rsp) + + val hit = if(!twoCycleRam) new Area{ + val valid = stage(fetchStage.hit.valid) + val error = stage(fetchStage.hit.error) + } else new Area{ + val tags = fetchStage.read.waysValues.map(way => stage(way.tag)) + val hits = tags.map(tag => tag.valid && tag.address === mmuRsp.physicalAddress(tagRange)) + val valid = Cat(hits).orR + val id = OHToUInt(hits) + val error = tags(id).error + if(dataOnDecode) { + val data = fetchStage.read.waysValues.map(way => stage(way.data)).read(id) + val word = data.subdivideIn(cpuDataWidth bits).read(io.cpu.decode.pc(memWordToCpuWordRange)) + io.cpu.decode.data := word + } + } + + io.cpu.decode.redo := io.cpu.decode.isValid && !hit.valid + when(io.cpu.decode.redo){ + io.cpu.prefetch.haltIt := True + lineLoader.valid := True + lineLoader.address := mmuRsp.physicalAddress //Could be optimise if mmu not used + } + + if(catchAccessFault) io.cpu.decode.error := hit.error + if(catchMemoryTranslationMiss) io.cpu.decode.mmuMiss := mmuRsp.miss + if(catchIllegalAccess) io.cpu.decode.illegalAccess := !mmuRsp.allowExecute || (io.cpu.decode.isUser && !mmuRsp.allowUser) + } } diff --git a/src/main/scala/vexriscv/plugin/DebugPlugin.scala b/src/main/scala/vexriscv/plugin/DebugPlugin.scala index b73c788..6c83aec 100644 --- a/src/main/scala/vexriscv/plugin/DebugPlugin.scala +++ b/src/main/scala/vexriscv/plugin/DebugPlugin.scala @@ -1,14 +1,16 @@ package vexriscv.plugin import spinal.lib.com.jtag.Jtag -import spinal.lib.system.debugger.{SystemDebugger, JtagBridge, SystemDebuggerConfig} -import vexriscv.plugin.IntAluPlugin.{AluCtrlEnum, ALU_CTRL} +import spinal.lib.system.debugger.{JtagBridge, SystemDebugger, SystemDebuggerConfig} +import vexriscv.plugin.IntAluPlugin.{ALU_CTRL, AluCtrlEnum} import vexriscv._ import vexriscv.ip._ import spinal.core._ import spinal.lib._ -import spinal.lib.bus.amba3.apb.{Apb3Config, Apb3} -import spinal.lib.bus.avalon.{AvalonMMConfig, AvalonMM} +import spinal.lib.bus.amba3.apb.{Apb3, Apb3Config} +import spinal.lib.bus.avalon.{AvalonMM, AvalonMMConfig} + +import scala.collection.mutable.ArrayBuffer case class DebugExtensionCmd() extends Bundle{ @@ -92,10 +94,18 @@ case class DebugExtensionIo() extends Bundle with IMasterSlave{ } } -class DebugPlugin(val debugClockDomain : ClockDomain) extends Plugin[VexRiscv] { + +//Allow to avoid instruction cache plugin to be confused by new instruction poping in the pipeline +trait InstructionInjector{ + def isInjecting(stage : Stage) : Bool +} + +class DebugPlugin(val debugClockDomain : ClockDomain) extends Plugin[VexRiscv] with InstructionInjector { var io : DebugExtensionIo = null - + val injectionAsks = ArrayBuffer[(Stage, Bool)]() + var isInjectingOnDecode : Bool = null + override def isInjecting(stage: Stage) : Bool = if(stage == pipeline.decode) isInjectingOnDecode else False object IS_EBREAK extends Stageable(Bool) override def setup(pipeline: VexRiscv): Unit = { @@ -114,13 +124,15 @@ class DebugPlugin(val debugClockDomain : ClockDomain) extends Plugin[VexRiscv] { SRC2_CTRL -> Src2CtrlEnum.PC, ALU_CTRL -> AluCtrlEnum.ADD_SUB //Used to get the PC value in busReadDataReg )) + + isInjectingOnDecode = Bool() } override def build(pipeline: VexRiscv): Unit = { import pipeline._ import pipeline.config._ - debugClockDomain {pipeline plug new Area{ + val logic = debugClockDomain {pipeline plug new Area{ val insertDecodeInstruction = False val firstCycle = RegNext(False) setWhen (io.bus.cmd.ready) val secondCycle = RegNext(firstCycle) @@ -168,12 +180,21 @@ class DebugPlugin(val debugClockDomain : ClockDomain) extends Plugin[VexRiscv] { } } - //Assign the bus write data into the register who drive the decode instruction, even if it need to cross some hierarchy (caches) Component.current.addPrePopTask(() => { - val reg = decode.input(INSTRUCTION).getDrivingReg - reg.component.rework { - when(insertDecodeInstruction.pull()) { - reg := io.bus.cmd.data.pull() + //Check if the decode instruction is driven by a register + val instructionDriver = try {decode.input(INSTRUCTION).getDrivingReg} catch { case _ : Throwable => null} + if(instructionDriver != null){ //If yes => + //Insert the instruction by writing the "fetch to decode instruction register", + // Work even if it need to cross some hierarchy (caches) + instructionDriver.component.rework { + when(insertDecodeInstruction.pull()) { + instructionDriver := io.bus.cmd.data.pull() + } + } + } else{ + //Insert the instruction via a mux in the decode stage + when(RegNext(insertDecodeInstruction)){ + decode.input(INSTRUCTION) := RegNext(io.bus.cmd.data) } } }) @@ -193,7 +214,9 @@ class DebugPlugin(val debugClockDomain : ClockDomain) extends Plugin[VexRiscv] { when(stepIt && prefetch.arbitration.isFiring) { haltIt := True } - + when(stepIt && Cat(pipeline.stages.map(_.arbitration.redoIt)).asBits.orR) { + haltIt := False + } io.resetOut := RegNext(resetIt) if(serviceExist(classOf[InterruptionInhibitor])) { @@ -207,5 +230,8 @@ class DebugPlugin(val debugClockDomain : ClockDomain) extends Plugin[VexRiscv] { } } }} + + + isInjectingOnDecode := RegNext(logic.insertDecodeInstruction) init(False) } } diff --git a/src/main/scala/vexriscv/plugin/FomalPlugin.scala b/src/main/scala/vexriscv/plugin/FormalPlugin.scala similarity index 98% rename from src/main/scala/vexriscv/plugin/FomalPlugin.scala rename to src/main/scala/vexriscv/plugin/FormalPlugin.scala index e0e27eb..d46feba 100644 --- a/src/main/scala/vexriscv/plugin/FomalPlugin.scala +++ b/src/main/scala/vexriscv/plugin/FormalPlugin.scala @@ -62,7 +62,7 @@ case class RvfiPort() extends Bundle with IMasterSlave { //2) JALR => clear PC(0) //3) input(INSTRUCTION)(5) REGFILE_WRITE_VALID memory read with exception would not fire properly -class FomalPlugin extends Plugin[VexRiscv]{ +class FormalPlugin extends Plugin[VexRiscv]{ var rvfi : RvfiPort = null diff --git a/src/main/scala/vexriscv/plugin/HaltOnExceptionPlugin.scala b/src/main/scala/vexriscv/plugin/HaltOnExceptionPlugin.scala index 44c06a9..e6761de 100644 --- a/src/main/scala/vexriscv/plugin/HaltOnExceptionPlugin.scala +++ b/src/main/scala/vexriscv/plugin/HaltOnExceptionPlugin.scala @@ -28,11 +28,15 @@ class HaltOnExceptionPlugin() extends Plugin[VexRiscv] with ExceptionService { stages.head.insert(FORMAL_HALT) := False stages.foreach(stage => { val stagePorts = exceptionPortsInfos.filter(_.stage == stage) - if(stagePorts.nonEmpty) - when(stagePorts.map(_.port.valid).orR){ + if(stagePorts.nonEmpty) { + when(stagePorts.map(info => info.port.valid).orR) { stage.output(FORMAL_HALT) := True stage.arbitration.haltItself := True } + for(stage <- stages){ + stage.output(FORMAL_HALT) clearWhen(stage.arbitration.isFlushed) + } + } }) } } diff --git a/src/main/scala/vexriscv/plugin/IBusCachedPlugin.scala b/src/main/scala/vexriscv/plugin/IBusCachedPlugin.scala index 0c67b7a..cfff932 100644 --- a/src/main/scala/vexriscv/plugin/IBusCachedPlugin.scala +++ b/src/main/scala/vexriscv/plugin/IBusCachedPlugin.scala @@ -8,15 +8,17 @@ import spinal.lib._ class IBusCachedPlugin(config : InstructionCacheConfig, askMemoryTranslation : Boolean = false, memoryTranslatorPortConfig : Any = null) extends Plugin[VexRiscv] { import config._ - assert(twoStageLogic || !askMemoryTranslation) var iBus : InstructionCacheMemBus = null var mmuBus : MemoryTranslatorBus = null var decodeExceptionPort : Flow[ExceptionCause] = null var privilegeService : PrivilegeService = null + var redoBranch : Flow[UInt] = null object FLUSH_ALL extends Stageable(Bool) object IBUS_ACCESS_ERROR extends Stageable(Bool) + object IBUS_MMU_MISS extends Stageable(Bool) + object IBUS_ILLEGAL_ACCESS extends Stageable(Bool) override def setup(pipeline: VexRiscv): Unit = { import Riscv._ import pipeline.config._ @@ -29,6 +31,9 @@ class IBusCachedPlugin(config : InstructionCacheConfig, askMemoryTranslation : B FLUSH_ALL -> True )) + //TODO manage priority with branch prediction + redoBranch = pipeline.service(classOf[JumpService]).createJumpInterface(pipeline.decode) + if(catchSomething) { val exceptionService = pipeline.service(classOf[ExceptionService]) decodeExceptionPort = exceptionService.newExceptionPort(pipeline.decode,1) @@ -68,59 +73,55 @@ class IBusCachedPlugin(config : InstructionCacheConfig, askMemoryTranslation : B //Connect prefetch cache side cache.io.cpu.prefetch.isValid := prefetch.arbitration.isValid - cache.io.cpu.prefetch.isFiring := prefetch.arbitration.isFiring - cache.io.cpu.prefetch.address := prefetch.output(PC) + cache.io.cpu.prefetch.pc := prefetch.output(PC) prefetch.arbitration.haltItself setWhen(cache.io.cpu.prefetch.haltIt) //Connect fetch cache side cache.io.cpu.fetch.isValid := fetch.arbitration.isValid cache.io.cpu.fetch.isStuck := fetch.arbitration.isStuck - if(!twoStageLogic) cache.io.cpu.fetch.isStuckByOthers := fetch.arbitration.isStuckByOthers - cache.io.cpu.fetch.address := fetch.output(PC) - if(!twoStageLogic) { - fetch.arbitration.haltItself setWhen (cache.io.cpu.fetch.haltIt) + cache.io.cpu.fetch.pc := fetch.output(PC) + + if (mmuBus != null) { + cache.io.cpu.fetch.mmuBus <> mmuBus + } else { + cache.io.cpu.fetch.mmuBus.rsp.physicalAddress := cache.io.cpu.fetch.mmuBus.cmd.virtualAddress + cache.io.cpu.fetch.mmuBus.rsp.allowExecute := True + cache.io.cpu.fetch.mmuBus.rsp.allowRead := True + cache.io.cpu.fetch.mmuBus.rsp.allowWrite := True + cache.io.cpu.fetch.mmuBus.rsp.allowUser := True + cache.io.cpu.fetch.mmuBus.rsp.isIoAccess := False + cache.io.cpu.fetch.mmuBus.rsp.miss := False + } + + if(dataOnDecode){ + decode.insert(INSTRUCTION) := cache.io.cpu.decode.data + }else{ fetch.insert(INSTRUCTION) := cache.io.cpu.fetch.data decode.insert(INSTRUCTION_ANTICIPATED) := Mux(decode.arbitration.isStuck,decode.input(INSTRUCTION),fetch.output(INSTRUCTION)) - decode.insert(INSTRUCTION_READY) := True - }else { - if (mmuBus != null) { - cache.io.cpu.fetch.mmuBus <> mmuBus - } else { - cache.io.cpu.fetch.mmuBus.rsp.physicalAddress := cache.io.cpu.fetch.mmuBus.cmd.virtualAddress - cache.io.cpu.fetch.mmuBus.rsp.allowExecute := True - cache.io.cpu.fetch.mmuBus.rsp.allowRead := True - cache.io.cpu.fetch.mmuBus.rsp.allowWrite := True - cache.io.cpu.fetch.mmuBus.rsp.allowUser := True - cache.io.cpu.fetch.mmuBus.rsp.isIoAccess := False - cache.io.cpu.fetch.mmuBus.rsp.miss := False - } } + decode.insert(INSTRUCTION_READY) := True + cache.io.cpu.decode.pc := decode.output(PC) + val ownDecode = pipeline.plugins.filter(_.isInstanceOf[InstructionInjector]).foldLeft(True)(_ && !_.asInstanceOf[InstructionInjector].isInjecting(decode)) + cache.io.cpu.decode.isValid := decode.arbitration.isValid && ownDecode + cache.io.cpu.decode.isStuck := decode.arbitration.isStuck + cache.io.cpu.decode.isUser := (if(privilegeService != null) privilegeService.isUser(decode) else False) +// cache.io.cpu.decode.pc := decode.input(PC) - if(twoStageLogic){ - cache.io.cpu.decode.isValid := decode.arbitration.isValid && RegNextWhen(fetch.arbitration.isValid, !decode.arbitration.isStuck) //avoid inserted instruction from debug module - decode.arbitration.haltItself.setWhen(cache.io.cpu.decode.haltIt) - cache.io.cpu.decode.isStuck := decode.arbitration.isStuck - cache.io.cpu.decode.isUser := (if(privilegeService != null) privilegeService.isUser(writeBack) else False) - cache.io.cpu.decode.address := decode.input(PC) - decode.insert(INSTRUCTION) := cache.io.cpu.decode.data - decode.insert(INSTRUCTION_ANTICIPATED) := cache.io.cpu.decode.dataAnticipated - decode.insert(INSTRUCTION_READY) := !cache.io.cpu.decode.haltIt + redoBranch.valid := cache.io.cpu.decode.redo + redoBranch.payload := decode.input(PC) + when(redoBranch.valid){ + decode.arbitration.redoIt := True + decode.arbitration.flushAll := True } - if(catchSomething){ - if(catchAccessFault) { - if (!twoStageLogic) fetch.insert(IBUS_ACCESS_ERROR) := cache.io.cpu.fetch.error - if (twoStageLogic) decode.insert(IBUS_ACCESS_ERROR) := cache.io.cpu.decode.error - } - - val accessFault = if(catchAccessFault) decode.input(IBUS_ACCESS_ERROR) else False + val accessFault = if(catchAccessFault) cache.io.cpu.decode.error else False val mmuMiss = if(catchMemoryTranslationMiss) cache.io.cpu.decode.mmuMiss else False val illegalAccess = if(catchIllegalAccess) cache.io.cpu.decode.illegalAccess else False - decodeExceptionPort.valid := decode.arbitration.isValid && (accessFault || mmuMiss || illegalAccess) + decodeExceptionPort.valid := decode.arbitration.isValid && ownDecode && (accessFault || mmuMiss || illegalAccess) decodeExceptionPort.code := mmuMiss ? U(14) | 1 decodeExceptionPort.badAddr := decode.input(PC) } @@ -130,11 +131,10 @@ class IBusCachedPlugin(config : InstructionCacheConfig, askMemoryTranslation : B cache.io.flush.cmd.valid := False when(arbitration.isValid && input(FLUSH_ALL)){ cache.io.flush.cmd.valid := True + decode.arbitration.flushAll := True when(!cache.io.flush.cmd.ready){ arbitration.haltItself := True - } otherwise { - decode.arbitration.flushAll := True } } } diff --git a/src/test/cpp/regression/main.cpp b/src/test/cpp/regression/main.cpp index 30c93d3..27dda00 100644 --- a/src/test/cpp/regression/main.cpp +++ b/src/test/cpp/regression/main.cpp @@ -358,6 +358,10 @@ public: top->reset = 1; top->eval(); + top->clk = 1; + top->eval(); + top->clk = 0; + top->eval(); #ifdef CSR top->timerInterrupt = 0; top->externalInterrupt = 1; @@ -435,7 +439,7 @@ public: for(SimElement* simElement : simElements) simElement->preCycle(); if(withInstructionReadCheck){ - if(top->VexRiscv->decode_arbitration_isValid && !top->VexRiscv->decode_arbitration_haltItself){ + if(top->VexRiscv->decode_arbitration_isValid && !top->VexRiscv->decode_arbitration_haltItself && !top->VexRiscv->decode_arbitration_flushAll){ uint32_t expectedData; bool dummy; iBusAccess(top->VexRiscv->decode_PC, &expectedData, &dummy); @@ -598,7 +602,7 @@ public: virtual void preCycle(){ if (top->iBus_cmd_valid && top->iBus_cmd_ready && pendingCount == 0) { assertEq(top->iBus_cmd_payload_address & 3,0); - pendingCount = 8; + pendingCount = (1 << top->iBus_cmd_payload_size)/4; address = top->iBus_cmd_payload_address; } } @@ -610,7 +614,7 @@ public: ws->iBusAccess(address,&top->iBus_rsp_payload_data,&error); top->iBus_rsp_payload_error = error; pendingCount--; - address = (address & ~0x1F) + ((address + 4) & 0x1F); + address = address + 4; top->iBus_rsp_valid = 1; } if(ws->iStall) top->iBus_cmd_ready = VL_RANDOM_I(7) < 100 && pendingCount == 0; @@ -1606,7 +1610,7 @@ string riscvTestDiv[] = { }; string freeRtosTests[] = { - "AltBlock", "AltQTest", "AltBlckQ", "AltPollQ", "blocktim", "countsem", "dead", "EventGroupsDemo", "flop", "integer", "QPeek", + "AltBlckQ", "AltBlock", "AltQTest", "AltPollQ", "blocktim", "countsem", "dead", "EventGroupsDemo", "flop", "integer", "QPeek", "QueueSet", "recmutex", "semtest", "TaskNotify", "BlockQ", "crhook", "dynamic", "GenQTest", "PollQ", "QueueOverwrite", "QueueSetPolling", "sp_flop", "test1" //"flop", "sp_flop" // <- Simple test @@ -1714,7 +1718,7 @@ int main(int argc, char **argv, char **env) { #ifdef CSR uint32_t machineCsrRef[] = {1,11, 2,0x80000003u, 3,0x80000007u, 4,0x8000000bu, 5,6,7,0x80000007u , 8,6,9,6,10,4,11,4, 12,13,0, 14,2, 15,5,16,17,1 }; - redo(REDO,TestX28("machineCsr",machineCsrRef, sizeof(machineCsrRef)/4).noInstructionReadCheck()->run(4e4);) + redo(REDO,TestX28("machineCsr",machineCsrRef, sizeof(machineCsrRef)/4).noInstructionReadCheck()->run(10e4);) #endif #ifdef MMU uint32_t mmuRef[] = {1,2,3, 0x11111111, 0x11111111, 0x11111111, 0x22222222, 0x22222222, 0x22222222, 4, 0x11111111, 0x33333333, 0x33333333, 5, diff --git a/src/test/cpp/regression/makefile b/src/test/cpp/regression/makefile index 3caee62..0d1be28 100644 --- a/src/test/cpp/regression/makefile +++ b/src/test/cpp/regression/makefile @@ -27,6 +27,7 @@ ADDCFLAGS += -CFLAGS -DREDO=${REDO} ADDCFLAGS += -CFLAGS -pthread ADDCFLAGS += -CFLAGS -DTHREAD_COUNT=${THREAD_COUNT} + ifeq ($(DHRYSTONE),yes) ADDCFLAGS += -CFLAGS -DDHRYSTONE endif diff --git a/src/test/scala/vexriscv/Play.scala b/src/test/scala/vexriscv/Play.scala new file mode 100644 index 0000000..48abd5f --- /dev/null +++ b/src/test/scala/vexriscv/Play.scala @@ -0,0 +1,119 @@ +package vexriscv + +import spinal.core._ +import spinal.lib.master +import vexriscv.ip.InstructionCacheConfig +import vexriscv.plugin._ + +object PlayGen extends App{ + def cpu() = new VexRiscv( + config = VexRiscvConfig( + plugins = List( + new IBusCachedPlugin( + config = InstructionCacheConfig( + cacheSize = 16, + bytePerLine = 4, + wayCount = 1, + wrappedMemAccess = false, + addressWidth = 32, + cpuDataWidth = 32, + memDataWidth = 32, + catchIllegalAccess = false, + catchAccessFault = false, + catchMemoryTranslationMiss = false, + asyncTagMemory = false, + twoStageLogic = false, + preResetFlush = false + ), + askMemoryTranslation = false + ), + new FormalPlugin, + new HaltOnExceptionPlugin, + new PcManagerSimplePlugin( + resetVector = 0x00000000l, + relaxedPcCalculation = false + ), +// new IBusSimplePlugin( +// interfaceKeepData = false, +// catchAccessFault = false +// ), + new DBusSimplePlugin( + catchAddressMisaligned = true, + catchAccessFault = false + ), + new DecoderSimplePlugin( + catchIllegalInstruction = true, + forceLegalInstructionComputation = true + ), + new RegFilePlugin( + regFileReadyKind = plugin.SYNC, + zeroBoot = false + ), + new IntAluPlugin, + new SrcPlugin( + separatedAddSub = false, + executeInsertion = false + ), + new FullBarrielShifterPlugin, + new HazardSimplePlugin( + bypassExecute = false, + bypassMemory = false, + bypassWriteBack = false, + bypassWriteBackBuffer = false, + pessimisticUseSrc = false, + pessimisticWriteRegFile = false, + pessimisticAddressMatch = false + ), + new BranchPlugin( + earlyBranch = false, + catchAddressMisaligned = true, + prediction = NONE + ), + new YamlPlugin("cpu0.yaml") + ) + ) + ) + // Wrap with input/output registers + def wrap(that : => VexRiscv) : Component = { + val c = that +// c.rework { +// for (e <- c.getOrdredNodeIo) { +// if (e.isInput) { +// e.asDirectionLess() +// e := RegNext(RegNext(in(cloneOf(e)))) +// +// } else { +// e.asDirectionLess() +// out(cloneOf(e)) := RegNext(RegNext(e)) +// } +// } +// } + + c.rework{ + c.config.plugins.foreach{ + case p : IBusCachedPlugin => { + p.iBus.asDirectionLess().unsetName() + val iBusNew = master(IBusSimpleBus(false)).setName("iBus") + + iBusNew.cmd.valid := p.iBus.cmd.valid + iBusNew.cmd.pc := p.iBus.cmd.address + p.iBus.cmd.ready := iBusNew.cmd.ready + + val pending = RegInit(False) clearWhen(iBusNew.rsp.ready) setWhen (iBusNew.cmd.fire) + p.iBus.rsp.valid := iBusNew.rsp.ready & pending + p.iBus.rsp.error := iBusNew.rsp.error + p.iBus.rsp.data := iBusNew.rsp.inst + } + case _ => + } + } + c + } + SpinalConfig( + defaultConfigForClockDomains = ClockDomainConfig( + resetKind = spinal.core.SYNC, + resetActiveLevel = spinal.core.HIGH + ), + inlineRom = true + ).generateVerilog(wrap(cpu())) +} From 93110d3b953ddcc0e5fcc63cb645a34a01a6d57d Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Fri, 16 Feb 2018 14:27:20 +0100 Subject: [PATCH 2/4] Add jump priority managment in PcPlugins --- src/main/scala/vexriscv/Services.scala | 2 +- src/main/scala/vexriscv/TestsWorkspace.scala | 4 ++-- src/main/scala/vexriscv/plugin/IBusCachedPlugin.scala | 2 +- .../scala/vexriscv/plugin/PcManagerSimplePlugin.scala | 11 +++++++---- src/test/cpp/regression/main.cpp | 2 ++ 5 files changed, 13 insertions(+), 8 deletions(-) diff --git a/src/main/scala/vexriscv/Services.scala b/src/main/scala/vexriscv/Services.scala index 5d6e371..451fd68 100644 --- a/src/main/scala/vexriscv/Services.scala +++ b/src/main/scala/vexriscv/Services.scala @@ -8,7 +8,7 @@ import spinal.lib._ import scala.beans.BeanProperty trait JumpService{ - def createJumpInterface(stage : Stage) : Flow[UInt] + def createJumpInterface(stage : Stage, priority : Int = 0) : Flow[UInt] } trait DecoderService{ diff --git a/src/main/scala/vexriscv/TestsWorkspace.scala b/src/main/scala/vexriscv/TestsWorkspace.scala index 9c1b4a4..fb2d2d7 100644 --- a/src/main/scala/vexriscv/TestsWorkspace.scala +++ b/src/main/scala/vexriscv/TestsWorkspace.scala @@ -41,9 +41,9 @@ object TestsWorkspace { // ), new IBusCachedPlugin( config = InstructionCacheConfig( - cacheSize = 4096, + cacheSize = 1024, bytePerLine = 32, - wayCount = 4, + wayCount = 2, wrappedMemAccess = true, addressWidth = 32, cpuDataWidth = 32, diff --git a/src/main/scala/vexriscv/plugin/IBusCachedPlugin.scala b/src/main/scala/vexriscv/plugin/IBusCachedPlugin.scala index cfff932..7c8455a 100644 --- a/src/main/scala/vexriscv/plugin/IBusCachedPlugin.scala +++ b/src/main/scala/vexriscv/plugin/IBusCachedPlugin.scala @@ -32,7 +32,7 @@ class IBusCachedPlugin(config : InstructionCacheConfig, askMemoryTranslation : B )) //TODO manage priority with branch prediction - redoBranch = pipeline.service(classOf[JumpService]).createJumpInterface(pipeline.decode) + redoBranch = pipeline.service(classOf[JumpService]).createJumpInterface(pipeline.decode, priority = 1) //Priority 1 will win against branch predictor if(catchSomething) { val exceptionService = pipeline.service(classOf[ExceptionService]) diff --git a/src/main/scala/vexriscv/plugin/PcManagerSimplePlugin.scala b/src/main/scala/vexriscv/plugin/PcManagerSimplePlugin.scala index 0c219f0..843cd3e 100644 --- a/src/main/scala/vexriscv/plugin/PcManagerSimplePlugin.scala +++ b/src/main/scala/vexriscv/plugin/PcManagerSimplePlugin.scala @@ -9,11 +9,11 @@ import scala.collection.mutable.ArrayBuffer class PcManagerSimplePlugin(resetVector : BigInt, relaxedPcCalculation : Boolean = false) extends Plugin[VexRiscv] with JumpService{ //FetchService interface - case class JumpInfo(interface : Flow[UInt], stage: Stage) + case class JumpInfo(interface : Flow[UInt], stage: Stage, priority : Int) val jumpInfos = ArrayBuffer[JumpInfo]() - override def createJumpInterface(stage: Stage): Flow[UInt] = { + override def createJumpInterface(stage: Stage, priority : Int = 0): Flow[UInt] = { val interface = Flow(UInt(32 bits)) - jumpInfos += JumpInfo(interface,stage) + jumpInfos += JumpInfo(interface,stage, priority) interface } var prefetchExceptionPort : Flow[ExceptionCause] = null @@ -59,7 +59,10 @@ class PcManagerSimplePlugin(resetVector : BigInt, //JumpService hardware implementation val jump = if(jumpInfos.length != 0) new Area { - val sortedByStage = jumpInfos.sortWith((a, b) => pipeline.indexOf(a.stage) > pipeline.indexOf(b.stage)) + val sortedByStage = jumpInfos.sortWith((a, b) => { + (pipeline.indexOf(a.stage) > pipeline.indexOf(b.stage)) || + (pipeline.indexOf(a.stage) == pipeline.indexOf(b.stage) && a.priority > b.priority) + }) val valids = sortedByStage.map(_.interface.valid) val pcs = sortedByStage.map(_.interface.payload) diff --git a/src/test/cpp/regression/main.cpp b/src/test/cpp/regression/main.cpp index 27dda00..054ec41 100644 --- a/src/test/cpp/regression/main.cpp +++ b/src/test/cpp/regression/main.cpp @@ -199,6 +199,8 @@ public: Workspace(string name){ + //setIStall(false); + //setDStall(false); staticMutex.lock(); testsCounter++; staticMutex.unlock(); From d0e963559af832dfdc7e202acfc6d9cf70e8422b Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Sun, 18 Feb 2018 23:48:11 +0100 Subject: [PATCH 3/4] Update readme with the new ICache implementation --- README.md | 32 ++++---- src/main/scala/vexriscv/TestsWorkspace.scala | 12 ++- src/main/scala/vexriscv/demo/Briey.scala | 3 +- .../scala/vexriscv/demo/DhrystoneBench.scala | 6 ++ src/main/scala/vexriscv/demo/GenFull.scala | 3 +- .../scala/vexriscv/demo/GenFullNoMmu.scala | 3 +- .../vexriscv/demo/GenFullNoMmuMaxPerf.scala | 3 +- .../demo/GenSmallAndPerformantICache.scala | 73 +++++++++++++++++++ .../scala/vexriscv/demo/SynthesisBench.scala | 11 ++- .../vexriscv/demo/VexRiscvAvalonForSim.scala | 3 +- .../VexRiscvAvalonWithIntegratedJtag.scala | 3 +- .../demo/VexRiscvAxi4WithIntegratedJtag.scala | 3 +- .../scala/vexriscv/ip/InstructionCache.scala | 8 +- .../scala/vexriscv/plugin/CsrPlugin.scala | 47 +++++++++++- .../vexriscv/plugin/IBusCachedPlugin.scala | 20 +++-- src/test/cpp/regression/main.cpp | 13 +++- src/test/cpp/regression/makefile | 8 +- src/test/scala/vexriscv/Play.scala | 3 +- 18 files changed, 194 insertions(+), 60 deletions(-) create mode 100644 src/main/scala/vexriscv/demo/GenSmallAndPerformantICache.scala diff --git a/README.md b/README.md index 1313130..17a1140 100644 --- a/README.md +++ b/README.md @@ -92,10 +92,14 @@ VexRiscv smallest (RV32I, 0.52 DMIPS/Mhz, no datapath bypass) -> Cyclone II -> 149 Mhz 780 LUT 578 FF VexRiscv small and productive (RV32I, 0.82 DMIPS/Mhz) -> - Artix 7 -> 309 Mhz 703 LUT 557 FF - Cyclone V -> 152 Mhz 502 ALMs - Cyclone IV -> 147 Mhz 1,062 LUT 552 FF - Cyclone II -> 120 Mhz 1,072 LUT 551 FF + Artix 7 -> 327 Mhz 698 LUT 558 FF + Cyclone V -> 158 Mhz 524 ALMs + Cyclone IV -> 146 Mhz 1,061 LUT 552 FF + +VexRiscv small and productive with I$ (RV32I, 0.72 DMIPS/Mhz, 4KB-I$) -> + Artix 7 -> 331 Mhz 727 LUT 600 FF + Cyclone V -> 152 Mhz 536 ALMs + Cyclone IV -> 156 Mhz 1,075 LUT 565 FF VexRiscv full no cache (RV32IM, 1.22 DMIPS/Mhz, single cycle barrel shifter, debug module, catch exceptions, static branch) -> Artix 7 -> 310 Mhz 1391 LUT 934 FF @@ -104,21 +108,19 @@ VexRiscv full no cache (RV32IM, 1.22 DMIPS/Mhz, single cycle barrel shifter, deb Cyclone II -> 108 Mhz 1,939 LUT 959 FF VexRiscv full (RV32IM, 1.21 DMIPS/Mhz with cache trashing, 4KB-I$,4KB-D$, single cycle barrel shifter, debug module, catch exceptions, static branch) -> - Artix 7 -> 250 Mhz 1911 LUT 1501 FF - Cyclone V -> 132 Mhz 1,266 ALMs - Cyclone IV -> 127 Mhz 2,733 LUT 1,762 FF - Cyclone II -> 103 Mhz 2,791 LUT 1,760 FF + Artix 7 -> 249 Mhz 1822 LUT 1362 FF + Cyclone V -> 128 Mhz 1,187 ALMs + Cyclone IV -> 107 Mhz 2,560 LUT 1,671 FF VexRiscv full max perf -> (RV32IM, 1.44 DMIPS/Mhz, 16KB-I$,16KB-D$, single cycle barrel shifter, debug module, catch exceptions, dynamic branch prediction in the fetch stage, branch and shift operations done in the Execute stage) -> - Artix 7 -> 198 Mhz 1920 LUT 1528 FF - Cyclone V -> 90 Mhz 1,261 ALMs - Cyclone IV -> 88 Mhz 2,780 LUT 1,788 FF + Artix 7 -> 192 Mhz 1858 LUT 1392 FF + Cyclone V -> 89 Mhz 1,246 ALMs + Cyclone IV -> 85 Mhz 2,673 LUT 1,679 FF VexRiscv full with MMU (RV32IM, 1.26 DMIPS/Mhz with cache trashing, 4KB-I$, 4KB-D$, single cycle barrel shifter, debug module, catch exceptions, dynamic branch, MMU) -> - Artix 7 -> 223 Mhz 2085 LUT 2020 FF - Cyclone V -> 110 Mhz 1,503 ALMs - Cyclone IV -> 108 Mhz 3,153 LUT 2,281 FF - Cyclone II -> 94 Mhz 3,187 LUT 2,281 FF + Artix 7 -> 208 Mhz 2092 LUT 1881 FF + Cyclone V - > 112 Mhz 1,435 ALMs + Cyclone IV -> 94 Mhz 2,980 LUT 2,169 FF ``` There is a summary of the configuration which produce 1.44 DMIPS : diff --git a/src/main/scala/vexriscv/TestsWorkspace.scala b/src/main/scala/vexriscv/TestsWorkspace.scala index fb2d2d7..9908027 100644 --- a/src/main/scala/vexriscv/TestsWorkspace.scala +++ b/src/main/scala/vexriscv/TestsWorkspace.scala @@ -41,10 +41,9 @@ object TestsWorkspace { // ), new IBusCachedPlugin( config = InstructionCacheConfig( - cacheSize = 1024, + cacheSize = 2048, bytePerLine = 32, - wayCount = 2, - wrappedMemAccess = true, + wayCount = 1, addressWidth = 32, cpuDataWidth = 32, memDataWidth = 32, @@ -52,8 +51,7 @@ object TestsWorkspace { catchAccessFault = true, catchMemoryTranslationMiss = true, asyncTagMemory = false, - twoStageLogic = false, - twoCycleRam = true + twoCycleRam = false ), askMemoryTranslation = true, memoryTranslatorPortConfig = MemoryTranslatorPortConfig( @@ -118,12 +116,12 @@ object TestsWorkspace { // new HazardSimplePlugin(false, false, false, false), new MulPlugin, new DivPlugin, - new CsrPlugin(CsrPluginConfig.all(0x80000020l)), + new CsrPlugin(CsrPluginConfig.all(0x80000020l).copy(deterministicInteruptionEntry = false)), new DebugPlugin(ClockDomain.current.clone(reset = Bool().setName("debugReset"))), new BranchPlugin( earlyBranch = true, catchAddressMisaligned = true, - prediction = NONE, + prediction = DYNAMIC_TARGET, historyRamSizeLog2 = 8 ), new YamlPlugin("cpu0.yaml") diff --git a/src/main/scala/vexriscv/demo/Briey.scala b/src/main/scala/vexriscv/demo/Briey.scala index 83cb654..bb0422b 100644 --- a/src/main/scala/vexriscv/demo/Briey.scala +++ b/src/main/scala/vexriscv/demo/Briey.scala @@ -57,7 +57,6 @@ object BrieyConfig{ cacheSize = 4096, bytePerLine =32, wayCount = 1, - wrappedMemAccess = true, addressWidth = 32, cpuDataWidth = 32, memDataWidth = 32, @@ -65,7 +64,7 @@ object BrieyConfig{ catchAccessFault = true, catchMemoryTranslationMiss = true, asyncTagMemory = false, - twoStageLogic = true + twoCycleRam = true ) // askMemoryTranslation = true, // memoryTranslatorPortConfig = MemoryTranslatorPortConfig( diff --git a/src/main/scala/vexriscv/demo/DhrystoneBench.scala b/src/main/scala/vexriscv/demo/DhrystoneBench.scala index 21c120b..c75a346 100644 --- a/src/main/scala/vexriscv/demo/DhrystoneBench.scala +++ b/src/main/scala/vexriscv/demo/DhrystoneBench.scala @@ -46,6 +46,12 @@ object DhrystoneBench extends App{ test = "make clean run REDO=10 IBUS=SIMPLE DBUS=SIMPLE CSR=no MMU=no DEBUG_PLUGIN=no MUL=no DIV=no" ) + getDmips( + name = "GenSmallAndProductiveWithICache", + gen = GenSmallAndProductiveICache.main(null), + test = "make clean run REDO=10 IBUS=CACHED DBUS=SIMPLE CSR=no MMU=no DEBUG_PLUGIN=no MUL=no DIV=no" + ) + getDmips( name = "GenFullNoMmuNoCache", diff --git a/src/main/scala/vexriscv/demo/GenFull.scala b/src/main/scala/vexriscv/demo/GenFull.scala index ede0f80..9f82fb9 100644 --- a/src/main/scala/vexriscv/demo/GenFull.scala +++ b/src/main/scala/vexriscv/demo/GenFull.scala @@ -21,7 +21,6 @@ object GenFull extends App{ cacheSize = 4096, bytePerLine =32, wayCount = 1, - wrappedMemAccess = true, addressWidth = 32, cpuDataWidth = 32, memDataWidth = 32, @@ -29,7 +28,7 @@ object GenFull extends App{ catchAccessFault = true, catchMemoryTranslationMiss = true, asyncTagMemory = false, - twoStageLogic = true + twoCycleRam = true ), askMemoryTranslation = true, memoryTranslatorPortConfig = MemoryTranslatorPortConfig( diff --git a/src/main/scala/vexriscv/demo/GenFullNoMmu.scala b/src/main/scala/vexriscv/demo/GenFullNoMmu.scala index c40c1b9..30636e9 100644 --- a/src/main/scala/vexriscv/demo/GenFullNoMmu.scala +++ b/src/main/scala/vexriscv/demo/GenFullNoMmu.scala @@ -21,7 +21,6 @@ object GenFullNoMmu extends App{ cacheSize = 4096, bytePerLine =32, wayCount = 1, - wrappedMemAccess = true, addressWidth = 32, cpuDataWidth = 32, memDataWidth = 32, @@ -29,7 +28,7 @@ object GenFullNoMmu extends App{ catchAccessFault = true, catchMemoryTranslationMiss = true, asyncTagMemory = false, - twoStageLogic = true + twoCycleRam = true ) ), new DBusCachedPlugin( diff --git a/src/main/scala/vexriscv/demo/GenFullNoMmuMaxPerf.scala b/src/main/scala/vexriscv/demo/GenFullNoMmuMaxPerf.scala index 40c7f51..007223c 100644 --- a/src/main/scala/vexriscv/demo/GenFullNoMmuMaxPerf.scala +++ b/src/main/scala/vexriscv/demo/GenFullNoMmuMaxPerf.scala @@ -21,7 +21,6 @@ object GenFullNoMmuMaxPerf extends App{ cacheSize = 4096*4, bytePerLine =32, wayCount = 1, - wrappedMemAccess = true, addressWidth = 32, cpuDataWidth = 32, memDataWidth = 32, @@ -29,7 +28,7 @@ object GenFullNoMmuMaxPerf extends App{ catchAccessFault = true, catchMemoryTranslationMiss = false, asyncTagMemory = false, - twoStageLogic = true + twoCycleRam = true ) ), new DBusCachedPlugin( diff --git a/src/main/scala/vexriscv/demo/GenSmallAndPerformantICache.scala b/src/main/scala/vexriscv/demo/GenSmallAndPerformantICache.scala new file mode 100644 index 0000000..0510480 --- /dev/null +++ b/src/main/scala/vexriscv/demo/GenSmallAndPerformantICache.scala @@ -0,0 +1,73 @@ +package vexriscv.demo + +import vexriscv.plugin._ +import vexriscv.{VexRiscv, VexRiscvConfig, plugin} +import spinal.core._ +import vexriscv.ip.InstructionCacheConfig + +/** + * Created by spinalvm on 15.06.17. + */ +object GenSmallAndProductiveICache extends App{ + def cpu() = new VexRiscv( + config = VexRiscvConfig( + plugins = List( + new PcManagerSimplePlugin( + resetVector = 0x00000000l, + relaxedPcCalculation = false + ), + new IBusCachedPlugin( + config = InstructionCacheConfig( + cacheSize = 4096, + bytePerLine = 32, + wayCount = 1, + addressWidth = 32, + cpuDataWidth = 32, + memDataWidth = 32, + catchIllegalAccess = false, + catchAccessFault = false, + catchMemoryTranslationMiss = false, + asyncTagMemory = false, + twoCycleRam = false + ), + askMemoryTranslation = false + ), + new DBusSimplePlugin( + catchAddressMisaligned = false, + catchAccessFault = false + ), + new CsrPlugin(CsrPluginConfig.smallest), + new DecoderSimplePlugin( + catchIllegalInstruction = false + ), + new RegFilePlugin( + regFileReadyKind = plugin.SYNC, + zeroBoot = false + ), + new IntAluPlugin, + new SrcPlugin( + separatedAddSub = false, + executeInsertion = true + ), + new LightShifterPlugin, + new HazardSimplePlugin( + bypassExecute = true, + bypassMemory = true, + bypassWriteBack = true, + bypassWriteBackBuffer = true, + pessimisticUseSrc = false, + pessimisticWriteRegFile = false, + pessimisticAddressMatch = false + ), + new BranchPlugin( + earlyBranch = false, + catchAddressMisaligned = false, + prediction = NONE + ), + new YamlPlugin("cpu0.yaml") + ) + ) + ) + + SpinalVerilog(cpu()) +} diff --git a/src/main/scala/vexriscv/demo/SynthesisBench.scala b/src/main/scala/vexriscv/demo/SynthesisBench.scala index 9175061..4c8eb61 100644 --- a/src/main/scala/vexriscv/demo/SynthesisBench.scala +++ b/src/main/scala/vexriscv/demo/SynthesisBench.scala @@ -49,6 +49,12 @@ object VexRiscvSynthesisBench { SpinalVerilog(wrap(GenSmallAndProductive.cpu()).setDefinitionName(getRtlPath().split("\\.").head)) } + val smallAndProductiveWithICache = new Rtl { + override def getName(): String = "VexRiscv small and productive with instruction cache" + override def getRtlPath(): String = "VexRiscvSmallAndProductiveICache.v" + SpinalVerilog(wrap(GenSmallAndProductiveICache.cpu()).setDefinitionName(getRtlPath().split("\\.").head)) + } + val fullNoMmuNoCache = new Rtl { override def getName(): String = "VexRiscv full no MMU no cache" override def getRtlPath(): String = "VexRiscvFullNoMmuNoCache.v" @@ -78,8 +84,9 @@ object VexRiscvSynthesisBench { SpinalVerilog(wrap(GenFull.cpu()).setDefinitionName(getRtlPath().split("\\.").head)) } - val rtls = List(smallestNoCsr, smallest, smallAndProductive, fullNoMmuNoCache, noCacheNoMmuMaxPerf, fullNoMmuMaxPerf, fullNoMmu, full) - // val rtls = List(noCacheNoMmuMaxPerf, fullNoMmuMaxPerf) +// val rtls = List(smallestNoCsr, smallest, smallAndProductive, smallAndProductiveWithICache, fullNoMmuNoCache, noCacheNoMmuMaxPerf, fullNoMmuMaxPerf, fullNoMmu, full) +// val rtls = List(noCacheNoMmuMaxPerf, fullNoMmuMaxPerf) + val rtls = List(smallAndProductive, smallAndProductiveWithICache, fullNoMmuMaxPerf, fullNoMmu, full) val targets = XilinxStdTargets( vivadoArtix7Path = "/eda/Xilinx/Vivado/2017.2/bin" diff --git a/src/main/scala/vexriscv/demo/VexRiscvAvalonForSim.scala b/src/main/scala/vexriscv/demo/VexRiscvAvalonForSim.scala index ed49d4d..6bea5f8 100644 --- a/src/main/scala/vexriscv/demo/VexRiscvAvalonForSim.scala +++ b/src/main/scala/vexriscv/demo/VexRiscvAvalonForSim.scala @@ -39,7 +39,6 @@ object VexRiscvAvalonForSim{ cacheSize = 4096, bytePerLine =32, wayCount = 1, - wrappedMemAccess = true, addressWidth = 32, cpuDataWidth = 32, memDataWidth = 32, @@ -47,7 +46,7 @@ object VexRiscvAvalonForSim{ catchAccessFault = true, catchMemoryTranslationMiss = true, asyncTagMemory = false, - twoStageLogic = true + twoCycleRam = true ) // askMemoryTranslation = true, // memoryTranslatorPortConfig = MemoryTranslatorPortConfig( diff --git a/src/main/scala/vexriscv/demo/VexRiscvAvalonWithIntegratedJtag.scala b/src/main/scala/vexriscv/demo/VexRiscvAvalonWithIntegratedJtag.scala index a23b2df..5bc58c1 100644 --- a/src/main/scala/vexriscv/demo/VexRiscvAvalonWithIntegratedJtag.scala +++ b/src/main/scala/vexriscv/demo/VexRiscvAvalonWithIntegratedJtag.scala @@ -38,7 +38,6 @@ object VexRiscvAvalonWithIntegratedJtag{ cacheSize = 4096, bytePerLine =32, wayCount = 1, - wrappedMemAccess = true, addressWidth = 32, cpuDataWidth = 32, memDataWidth = 32, @@ -46,7 +45,7 @@ object VexRiscvAvalonWithIntegratedJtag{ catchAccessFault = true, catchMemoryTranslationMiss = true, asyncTagMemory = false, - twoStageLogic = true + twoCycleRam = true ) // askMemoryTranslation = true, // memoryTranslatorPortConfig = MemoryTranslatorPortConfig( diff --git a/src/main/scala/vexriscv/demo/VexRiscvAxi4WithIntegratedJtag.scala b/src/main/scala/vexriscv/demo/VexRiscvAxi4WithIntegratedJtag.scala index 5ccbb1f..c12c131 100644 --- a/src/main/scala/vexriscv/demo/VexRiscvAxi4WithIntegratedJtag.scala +++ b/src/main/scala/vexriscv/demo/VexRiscvAxi4WithIntegratedJtag.scala @@ -39,7 +39,6 @@ object VexRiscvAxi4WithIntegratedJtag{ cacheSize = 4096, bytePerLine =32, wayCount = 1, - wrappedMemAccess = true, addressWidth = 32, cpuDataWidth = 32, memDataWidth = 32, @@ -47,7 +46,7 @@ object VexRiscvAxi4WithIntegratedJtag{ catchAccessFault = true, catchMemoryTranslationMiss = true, asyncTagMemory = false, - twoStageLogic = true + twoCycleRam = true ) // askMemoryTranslation = true, // memoryTranslatorPortConfig = MemoryTranslatorPortConfig( diff --git a/src/main/scala/vexriscv/ip/InstructionCache.scala b/src/main/scala/vexriscv/ip/InstructionCache.scala index 87ffec2..ab09dae 100644 --- a/src/main/scala/vexriscv/ip/InstructionCache.scala +++ b/src/main/scala/vexriscv/ip/InstructionCache.scala @@ -10,7 +10,6 @@ import spinal.lib.bus.avalon.{AvalonMMConfig, AvalonMM} case class InstructionCacheConfig( cacheSize : Int, bytePerLine : Int, wayCount : Int, - wrappedMemAccess : Boolean, addressWidth : Int, cpuDataWidth : Int, memDataWidth : Int, @@ -18,7 +17,6 @@ case class InstructionCacheConfig( cacheSize : Int, catchAccessFault : Boolean, catchMemoryTranslationMiss : Boolean, asyncTagMemory : Boolean, - twoStageLogic : Boolean, twoCycleRam : Boolean = false, preResetFlush : Boolean = false){ @@ -40,7 +38,6 @@ case class InstructionCacheConfig( cacheSize : Int, addressWidth = addressWidth, dataWidth = memDataWidth, burstCountWidth = log2Up(burstSize + 1)).getReadOnlyConfig.copy( - linewrapBursts = wrappedMemAccess, useResponse = true, constantBurstBehavior = true ) @@ -131,10 +128,7 @@ case class InstructionCacheMemBus(p : InstructionCacheConfig) extends Bundle wit mm.readCmd.addr := cmd.address mm.readCmd.prot := "110" mm.readCmd.cache := "1111" - if(p.wrappedMemAccess) - mm.readCmd.setBurstWRAP() - else - mm.readCmd.setBurstINCR() + mm.readCmd.setBurstINCR() cmd.ready := mm.readCmd.ready rsp.valid := mm.readRsp.valid rsp.data := mm.readRsp.data diff --git a/src/main/scala/vexriscv/plugin/CsrPlugin.scala b/src/main/scala/vexriscv/plugin/CsrPlugin.scala index 5efb3b4..89046b8 100644 --- a/src/main/scala/vexriscv/plugin/CsrPlugin.scala +++ b/src/main/scala/vexriscv/plugin/CsrPlugin.scala @@ -49,7 +49,9 @@ case class CsrPluginConfig( minstretAccess : CsrAccess, ucycleAccess : CsrAccess, wfiGen : Boolean, - ecallGen : Boolean + ecallGen : Boolean, + deterministicInteruptionEntry : Boolean = false //Only used for simulatation purposes + ){ assert(!ucycleAccess.canWrite) } @@ -431,10 +433,51 @@ class CsrPlugin(config : CsrPluginConfig) extends Plugin[VexRiscv] with Exceptio - val interrupt = ((mip.MSIP && mie.MSIE) || (mip.MEIP && mie.MEIE) || (mip.MTIP && mie.MTIE)) && mstatus.MIE && allowInterrupts + val interruptRequest = ((mip.MSIP && mie.MSIE) || (mip.MEIP && mie.MEIE) || (mip.MTIP && mie.MTIE)) && mstatus.MIE + val interrupt = interruptRequest && allowInterrupts val exception = if(exceptionPortCtrl != null) exceptionPortCtrl.exceptionValids.last && allowException else False val writeBackWasWfi = if(wfiGen) RegNext(writeBack.arbitration.isFiring && writeBack.input(ENV_CTRL) === EnvCtrlEnum.WFI) init(False) else False + + + val deteriministicLogic = if(deterministicInteruptionEntry) new Area{ + val counter = Reg(UInt(4 bits)) init(0) + + when(!interruptRequest || !mstatus.MIE){ + counter := 0 + } otherwise { + when(counter < 6){ + when(writeBack.arbitration.isFiring){ + counter := counter + 1 + } + } + val counterPlusPending = counter + CountOne(stages.tail.map(_.arbitration.isValid)) + when(counterPlusPending < 6){ + inhibateInterrupts() + } + } + } +// val deteriministicLogic = if(deterministicInteruptionEntry) new Area{ +// val counter = Reg(UInt(4 bits)) init(0) +// val limit = Reg(UInt(4 bits)) init(5) +// when(interruptRequest.rise()){ +// limit := CountOne(stages.tail.map(_.arbitration.isValid)).resized +// } +// when(!interruptRequest || !mstatus.MIE){ +// counter := 0 +// } otherwise { +// when(counter < limit){ +// when(writeBack.arbitration.isFiring){ +// counter := counter + 1 +// } +// } +// val counterPlusPending = counter + CountOne(stages.tail.map(_.arbitration.isValid)) + 1 +// when(counterPlusPending < limit){ +// inhibateInterrupts() +// } +// } +// } + //Interrupt/Exception entry logic pipelineLiberator.enable setWhen(interrupt) when(exception || (interrupt && pipelineLiberator.done)){ diff --git a/src/main/scala/vexriscv/plugin/IBusCachedPlugin.scala b/src/main/scala/vexriscv/plugin/IBusCachedPlugin.scala index 7c8455a..1c5999e 100644 --- a/src/main/scala/vexriscv/plugin/IBusCachedPlugin.scala +++ b/src/main/scala/vexriscv/plugin/IBusCachedPlugin.scala @@ -31,7 +31,7 @@ class IBusCachedPlugin(config : InstructionCacheConfig, askMemoryTranslation : B FLUSH_ALL -> True )) - //TODO manage priority with branch prediction + redoBranch = pipeline.service(classOf[JumpService]).createJumpInterface(pipeline.decode, priority = 1) //Priority 1 will win against branch predictor if(catchSomething) { @@ -52,6 +52,9 @@ class IBusCachedPlugin(config : InstructionCacheConfig, askMemoryTranslation : B val c = new CacheReport() e.kind = "cached" e.flushInstructions.add(0x400F) //invalid instruction cache + e.flushInstructions.add(0x13) + e.flushInstructions.add(0x13) + e.flushInstructions.add(0x13) e.info = c c.size = cacheSize @@ -65,26 +68,26 @@ class IBusCachedPlugin(config : InstructionCacheConfig, askMemoryTranslation : B override def build(pipeline: VexRiscv): Unit = { import pipeline._ import pipeline.config._ - +// val debugAddressOffset = 28 val cache = new InstructionCache(this.config) iBus = master(new InstructionCacheMemBus(this.config)).setName("iBus") iBus <> cache.io.mem - + iBus.cmd.address.allowOverride := cache.io.mem.cmd.address // - debugAddressOffset //Connect prefetch cache side cache.io.cpu.prefetch.isValid := prefetch.arbitration.isValid - cache.io.cpu.prefetch.pc := prefetch.output(PC) + cache.io.cpu.prefetch.pc := prefetch.output(PC)// + debugAddressOffset prefetch.arbitration.haltItself setWhen(cache.io.cpu.prefetch.haltIt) //Connect fetch cache side cache.io.cpu.fetch.isValid := fetch.arbitration.isValid cache.io.cpu.fetch.isStuck := fetch.arbitration.isStuck - cache.io.cpu.fetch.pc := fetch.output(PC) + cache.io.cpu.fetch.pc := fetch.output(PC) // + debugAddressOffset if (mmuBus != null) { cache.io.cpu.fetch.mmuBus <> mmuBus } else { - cache.io.cpu.fetch.mmuBus.rsp.physicalAddress := cache.io.cpu.fetch.mmuBus.cmd.virtualAddress + cache.io.cpu.fetch.mmuBus.rsp.physicalAddress := cache.io.cpu.fetch.mmuBus.cmd.virtualAddress //- debugAddressOffset cache.io.cpu.fetch.mmuBus.rsp.allowExecute := True cache.io.cpu.fetch.mmuBus.rsp.allowRead := True cache.io.cpu.fetch.mmuBus.rsp.allowWrite := True @@ -116,6 +119,11 @@ class IBusCachedPlugin(config : InstructionCacheConfig, askMemoryTranslation : B decode.arbitration.flushAll := True } +// val redo = RegInit(False) clearWhen(decode.arbitration.isValid) setWhen(redoBranch.valid) +// when(redoBranch.valid || redo){ +// service(classOf[InterruptionInhibitor]).inhibateInterrupts() +// } + if(catchSomething){ val accessFault = if(catchAccessFault) cache.io.cpu.decode.error else False val mmuMiss = if(catchMemoryTranslationMiss) cache.io.cpu.decode.mmuMiss else False diff --git a/src/test/cpp/regression/main.cpp b/src/test/cpp/regression/main.cpp index 054ec41..af0fcda 100644 --- a/src/test/cpp/regression/main.cpp +++ b/src/test/cpp/regression/main.cpp @@ -199,8 +199,8 @@ public: Workspace(string name){ - //setIStall(false); - //setDStall(false); + // setIStall(false); + // setDStall(false); staticMutex.lock(); testsCounter++; staticMutex.unlock(); @@ -406,7 +406,11 @@ public: #ifndef REF_TIME - mTime = i/2; + #ifndef MTIME_INSTR_FACTOR + mTime = i/2; + #else + mTime += top->VexRiscv->writeBack_arbitration_isFiring*MTIME_INSTR_FACTOR; + #endif #endif #ifdef CSR top->timerInterrupt = mTime >= mTimeCmp ? 1 : 0; @@ -1612,10 +1616,11 @@ string riscvTestDiv[] = { }; string freeRtosTests[] = { - "AltBlckQ", "AltBlock", "AltQTest", "AltPollQ", "blocktim", "countsem", "dead", "EventGroupsDemo", "flop", "integer", "QPeek", + "AltBlock", "AltQTest", "AltPollQ", "blocktim", "countsem", "dead", "EventGroupsDemo", "flop", "integer", "QPeek", "QueueSet", "recmutex", "semtest", "TaskNotify", "BlockQ", "crhook", "dynamic", "GenQTest", "PollQ", "QueueOverwrite", "QueueSetPolling", "sp_flop", "test1" //"flop", "sp_flop" // <- Simple test + // "AltBlckQ" ??? }; diff --git a/src/test/cpp/regression/makefile b/src/test/cpp/regression/makefile index 0d1be28..e6d27c1 100644 --- a/src/test/cpp/regression/makefile +++ b/src/test/cpp/regression/makefile @@ -19,7 +19,9 @@ REDO?=10 REF=no TRACE_WITH_TIME=no REF_TIME=no -THREAD_COUNT=4 +THREAD_COUNT?=4 +MTIME_INSTR_FACTOR?=no + ADDCFLAGS += -CFLAGS -DIBUS_${IBUS} ADDCFLAGS += -CFLAGS -DDBUS_${DBUS} @@ -32,6 +34,10 @@ ifeq ($(DHRYSTONE),yes) ADDCFLAGS += -CFLAGS -DDHRYSTONE endif +ifneq ($(MTIME_INSTR_FACTOR),no) + ADDCFLAGS += -CFLAGS -DMTIME_INSTR_FACTOR=${MTIME_INSTR_FACTOR} +endif + ifeq ($(TRACE),yes) VERILATOR_ARGS += --trace ADDCFLAGS += -CFLAGS -DTRACE diff --git a/src/test/scala/vexriscv/Play.scala b/src/test/scala/vexriscv/Play.scala index 48abd5f..6bc62e1 100644 --- a/src/test/scala/vexriscv/Play.scala +++ b/src/test/scala/vexriscv/Play.scala @@ -14,7 +14,6 @@ object PlayGen extends App{ cacheSize = 16, bytePerLine = 4, wayCount = 1, - wrappedMemAccess = false, addressWidth = 32, cpuDataWidth = 32, memDataWidth = 32, @@ -22,7 +21,7 @@ object PlayGen extends App{ catchAccessFault = false, catchMemoryTranslationMiss = false, asyncTagMemory = false, - twoStageLogic = false, + twoCycleRam = false, preResetFlush = false ), askMemoryTranslation = false From 8ac4d72623991f65845b1262c5b68afc39044d2c Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Sun, 18 Feb 2018 23:48:20 +0100 Subject: [PATCH 4/4] Update readme --- README.md | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 17a1140..b8a2f36 100644 --- a/README.md +++ b/README.md @@ -295,9 +295,9 @@ You can find some FPGA project which instantiate the Briey SoC there (DE1-SoC, D There is some measurements of Briey SoC timings and area : ``` - Artix 7 -> 231 Mhz 3339 LUT 3533 FF - Cyclone V -> 124 Mhz 2,264 ALMs - Cyclone IV -> 124 Mhz 4,709 LUT 3,716 FF + Artix 7 -> 239 Mhz 3227 LUT 3410 FF + Cyclone V -> 125 Mhz 2,207 ALMs + Cyclone IV -> 112 Mhz 4,594 LUT 3,620 ``` ## Murax SoC @@ -697,7 +697,23 @@ This plugin fit in the fetch stage #### IBusCachedPlugin -Single way cache implementation, documentation WIP +Simple and light multi way instruction cache. + +| Parameters | type | description | +| ------ | ----------- | ------ | +| cacheSize | Int | Total storage capacity of the cache | +| bytePerLine | Int | Number of byte per cache line | +| wayCount | Int | Number of cache way | +| twoCycleRam | Boolean | Check the tags values in the decode stage instead of the fetch stage to relax timings | +| asyncTagMemory | Boolean | Read the cache tags in a asyncronus manner instead of syncronous one | +| addressWidth | Int | Address width, should be 32 | +| cpuDataWidth | Int | Cpu data width, should be 32 | +| memDataWidth | Int | Memory data width, could potentialy be something else than 32, but only 32 is currently tested | +| catchIllegalAccess | Boolean | Catch when an memory access is done on non valid memory address (MMU) | +| catchAccessFault | Boolean | Catch when the memeory bus is responding with an error | +| catchMemoryTranslationMiss | Boolean | Catch when the MMU miss a TLB | + +Note : If you enable the twoCycleRam and and the wayCount is bigger than one, then the register file plugin should be configured to read the regFile in a asyncronus manner. #### DecoderSimplePlugin