Update readme with the new ICache implementation

This commit is contained in:
Dolu1990 2018-02-18 23:48:11 +01:00
parent 93110d3b95
commit d0e963559a
18 changed files with 194 additions and 60 deletions

View file

@ -92,10 +92,14 @@ VexRiscv smallest (RV32I, 0.52 DMIPS/Mhz, no datapath bypass) ->
Cyclone II -> 149 Mhz 780 LUT 578 FF
VexRiscv small and productive (RV32I, 0.82 DMIPS/Mhz) ->
Artix 7 -> 309 Mhz 703 LUT 557 FF
Cyclone V -> 152 Mhz 502 ALMs
Cyclone IV -> 147 Mhz 1,062 LUT 552 FF
Cyclone II -> 120 Mhz 1,072 LUT 551 FF
Artix 7 -> 327 Mhz 698 LUT 558 FF
Cyclone V -> 158 Mhz 524 ALMs
Cyclone IV -> 146 Mhz 1,061 LUT 552 FF
VexRiscv small and productive with I$ (RV32I, 0.72 DMIPS/Mhz, 4KB-I$) ->
Artix 7 -> 331 Mhz 727 LUT 600 FF
Cyclone V -> 152 Mhz 536 ALMs
Cyclone IV -> 156 Mhz 1,075 LUT 565 FF
VexRiscv full no cache (RV32IM, 1.22 DMIPS/Mhz, single cycle barrel shifter, debug module, catch exceptions, static branch) ->
Artix 7 -> 310 Mhz 1391 LUT 934 FF
@ -104,21 +108,19 @@ VexRiscv full no cache (RV32IM, 1.22 DMIPS/Mhz, single cycle barrel shifter, deb
Cyclone II -> 108 Mhz 1,939 LUT 959 FF
VexRiscv full (RV32IM, 1.21 DMIPS/Mhz with cache trashing, 4KB-I$,4KB-D$, single cycle barrel shifter, debug module, catch exceptions, static branch) ->
Artix 7 -> 250 Mhz 1911 LUT 1501 FF
Cyclone V -> 132 Mhz 1,266 ALMs
Cyclone IV -> 127 Mhz 2,733 LUT 1,762 FF
Cyclone II -> 103 Mhz 2,791 LUT 1,760 FF
Artix 7 -> 249 Mhz 1822 LUT 1362 FF
Cyclone V -> 128 Mhz 1,187 ALMs
Cyclone IV -> 107 Mhz 2,560 LUT 1,671 FF
VexRiscv full max perf -> (RV32IM, 1.44 DMIPS/Mhz, 16KB-I$,16KB-D$, single cycle barrel shifter, debug module, catch exceptions, dynamic branch prediction in the fetch stage, branch and shift operations done in the Execute stage) ->
Artix 7 -> 198 Mhz 1920 LUT 1528 FF
Cyclone V -> 90 Mhz 1,261 ALMs
Cyclone IV -> 88 Mhz 2,780 LUT 1,788 FF
Artix 7 -> 192 Mhz 1858 LUT 1392 FF
Cyclone V -> 89 Mhz 1,246 ALMs
Cyclone IV -> 85 Mhz 2,673 LUT 1,679 FF
VexRiscv full with MMU (RV32IM, 1.26 DMIPS/Mhz with cache trashing, 4KB-I$, 4KB-D$, single cycle barrel shifter, debug module, catch exceptions, dynamic branch, MMU) ->
Artix 7 -> 223 Mhz 2085 LUT 2020 FF
Cyclone V -> 110 Mhz 1,503 ALMs
Cyclone IV -> 108 Mhz 3,153 LUT 2,281 FF
Cyclone II -> 94 Mhz 3,187 LUT 2,281 FF
Artix 7 -> 208 Mhz 2092 LUT 1881 FF
Cyclone V - > 112 Mhz 1,435 ALMs
Cyclone IV -> 94 Mhz 2,980 LUT 2,169 FF
```
There is a summary of the configuration which produce 1.44 DMIPS :

View file

@ -41,10 +41,9 @@ object TestsWorkspace {
// ),
new IBusCachedPlugin(
config = InstructionCacheConfig(
cacheSize = 1024,
cacheSize = 2048,
bytePerLine = 32,
wayCount = 2,
wrappedMemAccess = true,
wayCount = 1,
addressWidth = 32,
cpuDataWidth = 32,
memDataWidth = 32,
@ -52,8 +51,7 @@ object TestsWorkspace {
catchAccessFault = true,
catchMemoryTranslationMiss = true,
asyncTagMemory = false,
twoStageLogic = false,
twoCycleRam = true
twoCycleRam = false
),
askMemoryTranslation = true,
memoryTranslatorPortConfig = MemoryTranslatorPortConfig(
@ -118,12 +116,12 @@ object TestsWorkspace {
// new HazardSimplePlugin(false, false, false, false),
new MulPlugin,
new DivPlugin,
new CsrPlugin(CsrPluginConfig.all(0x80000020l)),
new CsrPlugin(CsrPluginConfig.all(0x80000020l).copy(deterministicInteruptionEntry = false)),
new DebugPlugin(ClockDomain.current.clone(reset = Bool().setName("debugReset"))),
new BranchPlugin(
earlyBranch = true,
catchAddressMisaligned = true,
prediction = NONE,
prediction = DYNAMIC_TARGET,
historyRamSizeLog2 = 8
),
new YamlPlugin("cpu0.yaml")

View file

@ -57,7 +57,6 @@ object BrieyConfig{
cacheSize = 4096,
bytePerLine =32,
wayCount = 1,
wrappedMemAccess = true,
addressWidth = 32,
cpuDataWidth = 32,
memDataWidth = 32,
@ -65,7 +64,7 @@ object BrieyConfig{
catchAccessFault = true,
catchMemoryTranslationMiss = true,
asyncTagMemory = false,
twoStageLogic = true
twoCycleRam = true
)
// askMemoryTranslation = true,
// memoryTranslatorPortConfig = MemoryTranslatorPortConfig(

View file

@ -46,6 +46,12 @@ object DhrystoneBench extends App{
test = "make clean run REDO=10 IBUS=SIMPLE DBUS=SIMPLE CSR=no MMU=no DEBUG_PLUGIN=no MUL=no DIV=no"
)
getDmips(
name = "GenSmallAndProductiveWithICache",
gen = GenSmallAndProductiveICache.main(null),
test = "make clean run REDO=10 IBUS=CACHED DBUS=SIMPLE CSR=no MMU=no DEBUG_PLUGIN=no MUL=no DIV=no"
)
getDmips(
name = "GenFullNoMmuNoCache",

View file

@ -21,7 +21,6 @@ object GenFull extends App{
cacheSize = 4096,
bytePerLine =32,
wayCount = 1,
wrappedMemAccess = true,
addressWidth = 32,
cpuDataWidth = 32,
memDataWidth = 32,
@ -29,7 +28,7 @@ object GenFull extends App{
catchAccessFault = true,
catchMemoryTranslationMiss = true,
asyncTagMemory = false,
twoStageLogic = true
twoCycleRam = true
),
askMemoryTranslation = true,
memoryTranslatorPortConfig = MemoryTranslatorPortConfig(

View file

@ -21,7 +21,6 @@ object GenFullNoMmu extends App{
cacheSize = 4096,
bytePerLine =32,
wayCount = 1,
wrappedMemAccess = true,
addressWidth = 32,
cpuDataWidth = 32,
memDataWidth = 32,
@ -29,7 +28,7 @@ object GenFullNoMmu extends App{
catchAccessFault = true,
catchMemoryTranslationMiss = true,
asyncTagMemory = false,
twoStageLogic = true
twoCycleRam = true
)
),
new DBusCachedPlugin(

View file

@ -21,7 +21,6 @@ object GenFullNoMmuMaxPerf extends App{
cacheSize = 4096*4,
bytePerLine =32,
wayCount = 1,
wrappedMemAccess = true,
addressWidth = 32,
cpuDataWidth = 32,
memDataWidth = 32,
@ -29,7 +28,7 @@ object GenFullNoMmuMaxPerf extends App{
catchAccessFault = true,
catchMemoryTranslationMiss = false,
asyncTagMemory = false,
twoStageLogic = true
twoCycleRam = true
)
),
new DBusCachedPlugin(

View file

@ -0,0 +1,73 @@
package vexriscv.demo
import vexriscv.plugin._
import vexriscv.{VexRiscv, VexRiscvConfig, plugin}
import spinal.core._
import vexriscv.ip.InstructionCacheConfig
/**
* Created by spinalvm on 15.06.17.
*/
object GenSmallAndProductiveICache extends App{
def cpu() = new VexRiscv(
config = VexRiscvConfig(
plugins = List(
new PcManagerSimplePlugin(
resetVector = 0x00000000l,
relaxedPcCalculation = false
),
new IBusCachedPlugin(
config = InstructionCacheConfig(
cacheSize = 4096,
bytePerLine = 32,
wayCount = 1,
addressWidth = 32,
cpuDataWidth = 32,
memDataWidth = 32,
catchIllegalAccess = false,
catchAccessFault = false,
catchMemoryTranslationMiss = false,
asyncTagMemory = false,
twoCycleRam = false
),
askMemoryTranslation = false
),
new DBusSimplePlugin(
catchAddressMisaligned = false,
catchAccessFault = false
),
new CsrPlugin(CsrPluginConfig.smallest),
new DecoderSimplePlugin(
catchIllegalInstruction = false
),
new RegFilePlugin(
regFileReadyKind = plugin.SYNC,
zeroBoot = false
),
new IntAluPlugin,
new SrcPlugin(
separatedAddSub = false,
executeInsertion = true
),
new LightShifterPlugin,
new HazardSimplePlugin(
bypassExecute = true,
bypassMemory = true,
bypassWriteBack = true,
bypassWriteBackBuffer = true,
pessimisticUseSrc = false,
pessimisticWriteRegFile = false,
pessimisticAddressMatch = false
),
new BranchPlugin(
earlyBranch = false,
catchAddressMisaligned = false,
prediction = NONE
),
new YamlPlugin("cpu0.yaml")
)
)
)
SpinalVerilog(cpu())
}

View file

@ -49,6 +49,12 @@ object VexRiscvSynthesisBench {
SpinalVerilog(wrap(GenSmallAndProductive.cpu()).setDefinitionName(getRtlPath().split("\\.").head))
}
val smallAndProductiveWithICache = new Rtl {
override def getName(): String = "VexRiscv small and productive with instruction cache"
override def getRtlPath(): String = "VexRiscvSmallAndProductiveICache.v"
SpinalVerilog(wrap(GenSmallAndProductiveICache.cpu()).setDefinitionName(getRtlPath().split("\\.").head))
}
val fullNoMmuNoCache = new Rtl {
override def getName(): String = "VexRiscv full no MMU no cache"
override def getRtlPath(): String = "VexRiscvFullNoMmuNoCache.v"
@ -78,8 +84,9 @@ object VexRiscvSynthesisBench {
SpinalVerilog(wrap(GenFull.cpu()).setDefinitionName(getRtlPath().split("\\.").head))
}
val rtls = List(smallestNoCsr, smallest, smallAndProductive, fullNoMmuNoCache, noCacheNoMmuMaxPerf, fullNoMmuMaxPerf, fullNoMmu, full)
// val rtls = List(noCacheNoMmuMaxPerf, fullNoMmuMaxPerf)
// val rtls = List(smallestNoCsr, smallest, smallAndProductive, smallAndProductiveWithICache, fullNoMmuNoCache, noCacheNoMmuMaxPerf, fullNoMmuMaxPerf, fullNoMmu, full)
// val rtls = List(noCacheNoMmuMaxPerf, fullNoMmuMaxPerf)
val rtls = List(smallAndProductive, smallAndProductiveWithICache, fullNoMmuMaxPerf, fullNoMmu, full)
val targets = XilinxStdTargets(
vivadoArtix7Path = "/eda/Xilinx/Vivado/2017.2/bin"

View file

@ -39,7 +39,6 @@ object VexRiscvAvalonForSim{
cacheSize = 4096,
bytePerLine =32,
wayCount = 1,
wrappedMemAccess = true,
addressWidth = 32,
cpuDataWidth = 32,
memDataWidth = 32,
@ -47,7 +46,7 @@ object VexRiscvAvalonForSim{
catchAccessFault = true,
catchMemoryTranslationMiss = true,
asyncTagMemory = false,
twoStageLogic = true
twoCycleRam = true
)
// askMemoryTranslation = true,
// memoryTranslatorPortConfig = MemoryTranslatorPortConfig(

View file

@ -38,7 +38,6 @@ object VexRiscvAvalonWithIntegratedJtag{
cacheSize = 4096,
bytePerLine =32,
wayCount = 1,
wrappedMemAccess = true,
addressWidth = 32,
cpuDataWidth = 32,
memDataWidth = 32,
@ -46,7 +45,7 @@ object VexRiscvAvalonWithIntegratedJtag{
catchAccessFault = true,
catchMemoryTranslationMiss = true,
asyncTagMemory = false,
twoStageLogic = true
twoCycleRam = true
)
// askMemoryTranslation = true,
// memoryTranslatorPortConfig = MemoryTranslatorPortConfig(

View file

@ -39,7 +39,6 @@ object VexRiscvAxi4WithIntegratedJtag{
cacheSize = 4096,
bytePerLine =32,
wayCount = 1,
wrappedMemAccess = true,
addressWidth = 32,
cpuDataWidth = 32,
memDataWidth = 32,
@ -47,7 +46,7 @@ object VexRiscvAxi4WithIntegratedJtag{
catchAccessFault = true,
catchMemoryTranslationMiss = true,
asyncTagMemory = false,
twoStageLogic = true
twoCycleRam = true
)
// askMemoryTranslation = true,
// memoryTranslatorPortConfig = MemoryTranslatorPortConfig(

View file

@ -10,7 +10,6 @@ import spinal.lib.bus.avalon.{AvalonMMConfig, AvalonMM}
case class InstructionCacheConfig( cacheSize : Int,
bytePerLine : Int,
wayCount : Int,
wrappedMemAccess : Boolean,
addressWidth : Int,
cpuDataWidth : Int,
memDataWidth : Int,
@ -18,7 +17,6 @@ case class InstructionCacheConfig( cacheSize : Int,
catchAccessFault : Boolean,
catchMemoryTranslationMiss : Boolean,
asyncTagMemory : Boolean,
twoStageLogic : Boolean,
twoCycleRam : Boolean = false,
preResetFlush : Boolean = false){
@ -40,7 +38,6 @@ case class InstructionCacheConfig( cacheSize : Int,
addressWidth = addressWidth,
dataWidth = memDataWidth,
burstCountWidth = log2Up(burstSize + 1)).getReadOnlyConfig.copy(
linewrapBursts = wrappedMemAccess,
useResponse = true,
constantBurstBehavior = true
)
@ -131,9 +128,6 @@ case class InstructionCacheMemBus(p : InstructionCacheConfig) extends Bundle wit
mm.readCmd.addr := cmd.address
mm.readCmd.prot := "110"
mm.readCmd.cache := "1111"
if(p.wrappedMemAccess)
mm.readCmd.setBurstWRAP()
else
mm.readCmd.setBurstINCR()
cmd.ready := mm.readCmd.ready
rsp.valid := mm.readRsp.valid

View file

@ -49,7 +49,9 @@ case class CsrPluginConfig(
minstretAccess : CsrAccess,
ucycleAccess : CsrAccess,
wfiGen : Boolean,
ecallGen : Boolean
ecallGen : Boolean,
deterministicInteruptionEntry : Boolean = false //Only used for simulatation purposes
){
assert(!ucycleAccess.canWrite)
}
@ -431,10 +433,51 @@ class CsrPlugin(config : CsrPluginConfig) extends Plugin[VexRiscv] with Exceptio
val interrupt = ((mip.MSIP && mie.MSIE) || (mip.MEIP && mie.MEIE) || (mip.MTIP && mie.MTIE)) && mstatus.MIE && allowInterrupts
val interruptRequest = ((mip.MSIP && mie.MSIE) || (mip.MEIP && mie.MEIE) || (mip.MTIP && mie.MTIE)) && mstatus.MIE
val interrupt = interruptRequest && allowInterrupts
val exception = if(exceptionPortCtrl != null) exceptionPortCtrl.exceptionValids.last && allowException else False
val writeBackWasWfi = if(wfiGen) RegNext(writeBack.arbitration.isFiring && writeBack.input(ENV_CTRL) === EnvCtrlEnum.WFI) init(False) else False
val deteriministicLogic = if(deterministicInteruptionEntry) new Area{
val counter = Reg(UInt(4 bits)) init(0)
when(!interruptRequest || !mstatus.MIE){
counter := 0
} otherwise {
when(counter < 6){
when(writeBack.arbitration.isFiring){
counter := counter + 1
}
}
val counterPlusPending = counter + CountOne(stages.tail.map(_.arbitration.isValid))
when(counterPlusPending < 6){
inhibateInterrupts()
}
}
}
// val deteriministicLogic = if(deterministicInteruptionEntry) new Area{
// val counter = Reg(UInt(4 bits)) init(0)
// val limit = Reg(UInt(4 bits)) init(5)
// when(interruptRequest.rise()){
// limit := CountOne(stages.tail.map(_.arbitration.isValid)).resized
// }
// when(!interruptRequest || !mstatus.MIE){
// counter := 0
// } otherwise {
// when(counter < limit){
// when(writeBack.arbitration.isFiring){
// counter := counter + 1
// }
// }
// val counterPlusPending = counter + CountOne(stages.tail.map(_.arbitration.isValid)) + 1
// when(counterPlusPending < limit){
// inhibateInterrupts()
// }
// }
// }
//Interrupt/Exception entry logic
pipelineLiberator.enable setWhen(interrupt)
when(exception || (interrupt && pipelineLiberator.done)){

View file

@ -31,7 +31,7 @@ class IBusCachedPlugin(config : InstructionCacheConfig, askMemoryTranslation : B
FLUSH_ALL -> True
))
//TODO manage priority with branch prediction
redoBranch = pipeline.service(classOf[JumpService]).createJumpInterface(pipeline.decode, priority = 1) //Priority 1 will win against branch predictor
if(catchSomething) {
@ -52,6 +52,9 @@ class IBusCachedPlugin(config : InstructionCacheConfig, askMemoryTranslation : B
val c = new CacheReport()
e.kind = "cached"
e.flushInstructions.add(0x400F) //invalid instruction cache
e.flushInstructions.add(0x13)
e.flushInstructions.add(0x13)
e.flushInstructions.add(0x13)
e.info = c
c.size = cacheSize
@ -65,26 +68,26 @@ class IBusCachedPlugin(config : InstructionCacheConfig, askMemoryTranslation : B
override def build(pipeline: VexRiscv): Unit = {
import pipeline._
import pipeline.config._
// val debugAddressOffset = 28
val cache = new InstructionCache(this.config)
iBus = master(new InstructionCacheMemBus(this.config)).setName("iBus")
iBus <> cache.io.mem
iBus.cmd.address.allowOverride := cache.io.mem.cmd.address // - debugAddressOffset
//Connect prefetch cache side
cache.io.cpu.prefetch.isValid := prefetch.arbitration.isValid
cache.io.cpu.prefetch.pc := prefetch.output(PC)
cache.io.cpu.prefetch.pc := prefetch.output(PC)// + debugAddressOffset
prefetch.arbitration.haltItself setWhen(cache.io.cpu.prefetch.haltIt)
//Connect fetch cache side
cache.io.cpu.fetch.isValid := fetch.arbitration.isValid
cache.io.cpu.fetch.isStuck := fetch.arbitration.isStuck
cache.io.cpu.fetch.pc := fetch.output(PC)
cache.io.cpu.fetch.pc := fetch.output(PC) // + debugAddressOffset
if (mmuBus != null) {
cache.io.cpu.fetch.mmuBus <> mmuBus
} else {
cache.io.cpu.fetch.mmuBus.rsp.physicalAddress := cache.io.cpu.fetch.mmuBus.cmd.virtualAddress
cache.io.cpu.fetch.mmuBus.rsp.physicalAddress := cache.io.cpu.fetch.mmuBus.cmd.virtualAddress //- debugAddressOffset
cache.io.cpu.fetch.mmuBus.rsp.allowExecute := True
cache.io.cpu.fetch.mmuBus.rsp.allowRead := True
cache.io.cpu.fetch.mmuBus.rsp.allowWrite := True
@ -116,6 +119,11 @@ class IBusCachedPlugin(config : InstructionCacheConfig, askMemoryTranslation : B
decode.arbitration.flushAll := True
}
// val redo = RegInit(False) clearWhen(decode.arbitration.isValid) setWhen(redoBranch.valid)
// when(redoBranch.valid || redo){
// service(classOf[InterruptionInhibitor]).inhibateInterrupts()
// }
if(catchSomething){
val accessFault = if(catchAccessFault) cache.io.cpu.decode.error else False
val mmuMiss = if(catchMemoryTranslationMiss) cache.io.cpu.decode.mmuMiss else False

View file

@ -199,8 +199,8 @@ public:
Workspace(string name){
//setIStall(false);
//setDStall(false);
// setIStall(false);
// setDStall(false);
staticMutex.lock();
testsCounter++;
staticMutex.unlock();
@ -406,7 +406,11 @@ public:
#ifndef REF_TIME
#ifndef MTIME_INSTR_FACTOR
mTime = i/2;
#else
mTime += top->VexRiscv->writeBack_arbitration_isFiring*MTIME_INSTR_FACTOR;
#endif
#endif
#ifdef CSR
top->timerInterrupt = mTime >= mTimeCmp ? 1 : 0;
@ -1612,10 +1616,11 @@ string riscvTestDiv[] = {
};
string freeRtosTests[] = {
"AltBlckQ", "AltBlock", "AltQTest", "AltPollQ", "blocktim", "countsem", "dead", "EventGroupsDemo", "flop", "integer", "QPeek",
"AltBlock", "AltQTest", "AltPollQ", "blocktim", "countsem", "dead", "EventGroupsDemo", "flop", "integer", "QPeek",
"QueueSet", "recmutex", "semtest", "TaskNotify", "BlockQ", "crhook", "dynamic",
"GenQTest", "PollQ", "QueueOverwrite", "QueueSetPolling", "sp_flop", "test1"
//"flop", "sp_flop" // <- Simple test
// "AltBlckQ" ???
};

View file

@ -19,7 +19,9 @@ REDO?=10
REF=no
TRACE_WITH_TIME=no
REF_TIME=no
THREAD_COUNT=4
THREAD_COUNT?=4
MTIME_INSTR_FACTOR?=no
ADDCFLAGS += -CFLAGS -DIBUS_${IBUS}
ADDCFLAGS += -CFLAGS -DDBUS_${DBUS}
@ -32,6 +34,10 @@ ifeq ($(DHRYSTONE),yes)
ADDCFLAGS += -CFLAGS -DDHRYSTONE
endif
ifneq ($(MTIME_INSTR_FACTOR),no)
ADDCFLAGS += -CFLAGS -DMTIME_INSTR_FACTOR=${MTIME_INSTR_FACTOR}
endif
ifeq ($(TRACE),yes)
VERILATOR_ARGS += --trace
ADDCFLAGS += -CFLAGS -DTRACE

View file

@ -14,7 +14,6 @@ object PlayGen extends App{
cacheSize = 16,
bytePerLine = 4,
wayCount = 1,
wrappedMemAccess = false,
addressWidth = 32,
cpuDataWidth = 32,
memDataWidth = 32,
@ -22,7 +21,7 @@ object PlayGen extends App{
catchAccessFault = false,
catchMemoryTranslationMiss = false,
asyncTagMemory = false,
twoStageLogic = false,
twoCycleRam = false,
preResetFlush = false
),
askMemoryTranslation = false