Merge remote-tracking branch 'origin/dev' into dev

This commit is contained in:
Charles Papon 2019-10-11 00:25:37 +02:00
commit b4c75d4898
4 changed files with 29 additions and 32 deletions

View File

@ -66,51 +66,51 @@ The CPU configurations used below can be found in the `src/scala/vexriscv/demo`
```
VexRiscv smallest (RV32I, 0.52 DMIPS/Mhz, no datapath bypass, no interrupt) ->
Artix 7 -> 324 Mhz 496 LUT 505 FF
Artix 7 -> 233 Mhz 494 LUT 505 FF
Cyclone V -> 193 Mhz 347 ALMs
Cyclone IV -> 179 Mhz 730 LUT 494 FF
iCE40 -> 92 Mhz 1130 LC
VexRiscv smallest (RV32I, 0.52 DMIPS/Mhz, no datapath bypass) ->
Artix 7 -> 328 Mhz 539 LUT 562 FF
Artix 7 -> 232 Mhz 538 LUT 562 FF
Cyclone V -> 189 Mhz 387 ALMs
Cyclone IV -> 175 Mhz 829 LUT 550 FF
iCE40 -> 85 Mhz 1292 LC
VexRiscv small and productive (RV32I, 0.82 DMIPS/Mhz) ->
Artix 7 -> 324 Mhz 701 LUT 531 FF
Artix 7 -> 226 Mhz 689 LUT 531 FF
Cyclone V -> 145 Mhz 499 ALMs
Cyclone IV -> 150 Mhz 1,111 LUT 525 FF
iCE40 -> 63 Mhz 1596 LC
VexRiscv small and productive with I$ (RV32I, 0.70 DMIPS/Mhz, 4KB-I$) ->
Artix 7 -> 336 Mhz 764 LUT 562 FF
Artix 7 -> 230 Mhz 734 LUT 564 FF
Cyclone V -> 145 Mhz 511 ALMs
Cyclone IV -> 144 Mhz 1,145 LUT 531 FF
iCE40 -> 66 Mhz 1680 LC
VexRiscv full no cache (RV32IM, 1.21 DMIPS/Mhz 2.30 Coremark/Mhz, single cycle barrel shifter, debug module, catch exceptions, static branch) ->
Artix 7 -> 326 Mhz 1544 LUT 977 FF
Artix 7 -> 219 Mhz 1537 LUT 977 FF
Cyclone V -> 139 Mhz 958 ALMs
Cyclone IV -> 135 Mhz 2,011 LUT 968 FF
VexRiscv full (RV32IM, 1.21 DMIPS/Mhz 2.30 Coremark/Mhz with cache trashing, 4KB-I$,4KB-D$, single cycle barrel shifter, debug module, catch exceptions, static branch) ->
Artix 7 -> 279 Mhz 1686 LUT 1172 FF
Artix 7 -> 193 Mhz 1706 LUT 1172 FF
Cyclone V -> 144 Mhz 1,128 ALMs
Cyclone IV -> 133 Mhz 2,298 LUT 1,096 FF
VexRiscv full max dmips/mhz -> (RV32IM, 1.44 DMIPS/Mhz 2.70 Coremark/Mhz,, 16KB-I$,16KB-D$, single cycle barrel shifter, debug module, catch exceptions, dynamic branch prediction in the fetch stage, branch and shift operations done in the Execute stage) ->
Artix 7 -> 193 Mhz 1758 LUT 1094 FF
Artix 7 -> 140 Mhz 1767 LUT 1128 FF
Cyclone V -> 90 Mhz 1,089 ALMs
Cyclone IV -> 79 Mhz 2,336 LUT 1,048 FF
VexRiscv full with MMU (RV32IM, 1.24 DMIPS/Mhz 2.35 Coremark/Mhz, with cache trashing, 4KB-I$, 4KB-D$, single cycle barrel shifter, debug module, catch exceptions, dynamic branch, MMU) ->
Artix 7 -> 239 Mhz 2029 LUT 1585 FF
Artix 7 -> 161 Mhz 1985 LUT 1585 FF
Cyclone V -> 124 Mhz 1,319 ALMs
Cyclone IV -> 122 Mhz 2,710 LUT 1,501 FF
VexRiscv linux balanced (RV32IMA, 1.21 DMIPS/Mhz 2.27 Coremark/Mhz, with cache trashing, 4KB-I$, 4KB-D$, single cycle barrel shifter, catch exceptions, static branch, MMU, Supervisor, Compatible with mainstream linux) ->
Artix 7 -> 249 Mhz 2549 LUT 2014 FF
Artix 7 -> 170 Mhz 2530 LUT 2013 FF
Cyclone V -> 125 Mhz 1,618 ALMs
Cyclone IV -> 116 Mhz 3,314 LUT 2,016 FF
@ -296,7 +296,7 @@ You can find some FPGA projects which instantiate the Briey SoC here (DE1-SoC, D
Here are some measurements of Briey SoC timings and area :
```
Artix 7 -> 275 Mhz 3072 LUT 3291 FF
Artix 7 -> 186 Mhz 3138 LUT 3328 FF
Cyclone V -> 139 Mhz 2,175 ALMs
Cyclone IV -> 129 Mhz 4,337 LUT 3,170 FF
```
@ -351,13 +351,13 @@ Here are some timing and area measurements of the Murax SoC:
```
Murax interlocked stages (0.45 DMIPS/Mhz, 8 bits GPIO) ->
Artix 7 -> 313 Mhz 1039 LUT 1200 FF
Artix 7 -> 215 Mhz 1044 LUT 1202 FF
Cyclone V -> 173 Mhz 737 ALMs
Cyclone IV -> 144 Mhz 1,484 LUT 1,206 FF
iCE40 -> 64 Mhz 2422 LC (nextpnr)
MuraxFast bypassed stages (0.65 DMIPS/Mhz, 8 bits GPIO) ->
Artix 7 -> 323 Mhz 1241 LUT 1301 FF
Artix 7 -> 229 Mhz 1269 LUT 1302 FF
Cyclone V -> 159 Mhz 864 ALMs
Cyclone IV -> 137 Mhz 1,688 LUT 1,241 FF
iCE40 -> 66 Mhz 2799 LC (nextpnr)

View File

@ -22,7 +22,8 @@ case class InstructionCacheConfig( cacheSize : Int,
asyncTagMemory : Boolean,
twoCycleCache : Boolean = true,
twoCycleRam : Boolean = false,
preResetFlush : Boolean = false){
preResetFlush : Boolean = false,
bypassGen : Boolean = false ){
assert(!(twoCycleRam && !twoCycleCache))
@ -108,8 +109,8 @@ case class InstructionCacheCpuFetch(p : InstructionCacheConfig) extends Bundle w
val isRemoved = Bool()
val pc = UInt(p.addressWidth bits)
val data = Bits(p.cpuDataWidth bits)
val dataBypassValid = Bool()
val dataBypass = Bits(p.cpuDataWidth bits)
val dataBypassValid = p.bypassGen generate Bool()
val dataBypass = p.bypassGen generate Bits(p.cpuDataWidth bits)
val mmuBus = MemoryTranslatorBus()
val physicalAddress = UInt(p.addressWidth bits)
val cacheMiss, error, mmuRefilling, mmuException, isUser = ifGen(!p.twoCycleCache)(Bool)
@ -415,15 +416,16 @@ class InstructionCache(p : InstructionCacheConfig) extends Component{
val id = OHToUInt(hits)
val error = read.waysValues.map(_.tag.error).read(id)
val data = read.waysValues.map(_.data).read(id)
val word = data.subdivideIn(cpuDataWidth bits).read(io.cpu.fetch.pc(memWordToCpuWordRange))
io.cpu.fetch.data := (io.cpu.fetch.dataBypassValid ? io.cpu.fetch.dataBypass | word)
val word = if(cpuDataWidth == memDataWidth) CombInit(data) else data.subdivideIn(cpuDataWidth bits).read(io.cpu.fetch.pc(memWordToCpuWordRange))
io.cpu.fetch.data := (if(p.bypassGen) (io.cpu.fetch.dataBypassValid ? io.cpu.fetch.dataBypass | word) else word)
if(twoCycleCache){
io.cpu.decode.data := RegNextWhen(io.cpu.fetch.data,!io.cpu.decode.isStuck)
}
}
if(twoCycleRam && wayCount == 1){
io.cpu.fetch.data := (io.cpu.fetch.dataBypassValid ? io.cpu.fetch.dataBypass | read.waysValues.head.data.subdivideIn(cpuDataWidth bits).read(io.cpu.fetch.pc(memWordToCpuWordRange)))
val cacheData = if(cpuDataWidth == memDataWidth) CombInit(read.waysValues.head.data) else read.waysValues.head.data.subdivideIn(cpuDataWidth bits).read(io.cpu.fetch.pc(memWordToCpuWordRange))
io.cpu.fetch.data := (if(p.bypassGen) (io.cpu.fetch.dataBypassValid ? io.cpu.fetch.dataBypass | cacheData) else cacheData)
}
io.cpu.fetch.mmuBus.cmd.isValid := io.cpu.fetch.isValid
@ -458,8 +460,8 @@ class InstructionCache(p : InstructionCacheConfig) extends Component{
val id = OHToUInt(hits)
val error = tags(id).error
val data = fetchStage.read.waysValues.map(way => stage(way.data)).read(id)
val word = data.subdivideIn(cpuDataWidth bits).read(io.cpu.decode.pc(memWordToCpuWordRange))
when(stage(io.cpu.fetch.dataBypassValid)){
val word = if(cpuDataWidth == memDataWidth) data else data.subdivideIn(cpuDataWidth bits).read(io.cpu.decode.pc(memWordToCpuWordRange))
if(p.bypassGen) when(stage(io.cpu.fetch.dataBypassValid)){
word := stage(io.cpu.fetch.dataBypass)
}
io.cpu.decode.data := word

View File

@ -945,14 +945,9 @@ class CsrPlugin(val config: CsrPluginConfig) extends Plugin[VexRiscv] with Excep
val readData = B(0, 32 bits)
val writeInstruction = arbitration.isValid && input(IS_CSR) && input(CSR_WRITE_OPCODE)
val readInstruction = arbitration.isValid && input(IS_CSR) && input(CSR_READ_OPCODE)
val writeEnable = writeInstruction && ! blockedBySideEffects // && readDataRegValid
val readEnable = readInstruction && ! blockedBySideEffects // && !readDataRegValid
val writeEnable = writeInstruction && ! blockedBySideEffects && !arbitration.isStuckByOthers// && readDataRegValid
val readEnable = readInstruction && ! blockedBySideEffects && !arbitration.isStuckByOthers// && !readDataRegValid
//arbitration.isStuckByOthers, in case of the hazardPlugin is in the executeStage
val hazardStage = service(classOf[RegFileService]).readStage()
if(hazardStage == execute) when (arbitration.isStuckByOthers){
writeEnable := False
readEnable := False
}
// def readDataReg = memory.input(REGFILE_WRITE_DATA) //PIPE OPT

View File

@ -61,7 +61,7 @@ class IBusCachedPlugin(resetVector : BigInt = 0x80000000l,
var redoBranch : Flow[UInt] = null
var decodeExceptionPort : Flow[ExceptionCause] = null
val tightlyCoupledPorts = ArrayBuffer[TightlyCoupledPort]()
def tightlyGen = tightlyCoupledPorts.nonEmpty
def newTightlyCoupledPort(p : TightlyCoupledPortParameter) = {
val port = TightlyCoupledPort(p, null)
@ -125,7 +125,7 @@ class IBusCachedPlugin(resetVector : BigInt = 0x80000000l,
import pipeline.config._
pipeline plug new FetchArea(pipeline) {
val cache = new InstructionCache(IBusCachedPlugin.this.config)
val cache = new InstructionCache(IBusCachedPlugin.this.config.copy(bypassGen = tightlyGen))
iBus = master(new InstructionCacheMemBus(IBusCachedPlugin.this.config)).setName("iBus")
iBus <> cache.io.mem
iBus.cmd.address.allowOverride := cache.io.mem.cmd.address
@ -165,8 +165,8 @@ class IBusCachedPlugin(resetVector : BigInt = 0x80000000l,
val tightlyCoupledHits = RegNextWhen(s0.tightlyCoupledHits, stages(1).input.ready)
val tightlyCoupledHit = RegNextWhen(s0.tightlyCoupledHit, stages(1).input.ready)
cache.io.cpu.fetch.dataBypassValid := tightlyCoupledHit
cache.io.cpu.fetch.dataBypass := (if(tightlyCoupledPorts.isEmpty) B(0) else MuxOH(tightlyCoupledHits, tightlyCoupledPorts.map(e => CombInit(e.bus.data))))
if(tightlyGen) cache.io.cpu.fetch.dataBypassValid := tightlyCoupledHit
if(tightlyGen) cache.io.cpu.fetch.dataBypass := MuxOH(tightlyCoupledHits, tightlyCoupledPorts.map(e => CombInit(e.bus.data)))
//Connect fetch cache side
cache.io.cpu.fetch.isValid := stages(1).input.valid && !tightlyCoupledHit