Merge remote-tracking branch 'origin/dev' into dev
This commit is contained in:
commit
b4c75d4898
24
README.md
24
README.md
|
@ -66,51 +66,51 @@ The CPU configurations used below can be found in the `src/scala/vexriscv/demo`
|
||||||
|
|
||||||
```
|
```
|
||||||
VexRiscv smallest (RV32I, 0.52 DMIPS/Mhz, no datapath bypass, no interrupt) ->
|
VexRiscv smallest (RV32I, 0.52 DMIPS/Mhz, no datapath bypass, no interrupt) ->
|
||||||
Artix 7 -> 324 Mhz 496 LUT 505 FF
|
Artix 7 -> 233 Mhz 494 LUT 505 FF
|
||||||
Cyclone V -> 193 Mhz 347 ALMs
|
Cyclone V -> 193 Mhz 347 ALMs
|
||||||
Cyclone IV -> 179 Mhz 730 LUT 494 FF
|
Cyclone IV -> 179 Mhz 730 LUT 494 FF
|
||||||
iCE40 -> 92 Mhz 1130 LC
|
iCE40 -> 92 Mhz 1130 LC
|
||||||
|
|
||||||
VexRiscv smallest (RV32I, 0.52 DMIPS/Mhz, no datapath bypass) ->
|
VexRiscv smallest (RV32I, 0.52 DMIPS/Mhz, no datapath bypass) ->
|
||||||
Artix 7 -> 328 Mhz 539 LUT 562 FF
|
Artix 7 -> 232 Mhz 538 LUT 562 FF
|
||||||
Cyclone V -> 189 Mhz 387 ALMs
|
Cyclone V -> 189 Mhz 387 ALMs
|
||||||
Cyclone IV -> 175 Mhz 829 LUT 550 FF
|
Cyclone IV -> 175 Mhz 829 LUT 550 FF
|
||||||
iCE40 -> 85 Mhz 1292 LC
|
iCE40 -> 85 Mhz 1292 LC
|
||||||
|
|
||||||
VexRiscv small and productive (RV32I, 0.82 DMIPS/Mhz) ->
|
VexRiscv small and productive (RV32I, 0.82 DMIPS/Mhz) ->
|
||||||
Artix 7 -> 324 Mhz 701 LUT 531 FF
|
Artix 7 -> 226 Mhz 689 LUT 531 FF
|
||||||
Cyclone V -> 145 Mhz 499 ALMs
|
Cyclone V -> 145 Mhz 499 ALMs
|
||||||
Cyclone IV -> 150 Mhz 1,111 LUT 525 FF
|
Cyclone IV -> 150 Mhz 1,111 LUT 525 FF
|
||||||
iCE40 -> 63 Mhz 1596 LC
|
iCE40 -> 63 Mhz 1596 LC
|
||||||
|
|
||||||
VexRiscv small and productive with I$ (RV32I, 0.70 DMIPS/Mhz, 4KB-I$) ->
|
VexRiscv small and productive with I$ (RV32I, 0.70 DMIPS/Mhz, 4KB-I$) ->
|
||||||
Artix 7 -> 336 Mhz 764 LUT 562 FF
|
Artix 7 -> 230 Mhz 734 LUT 564 FF
|
||||||
Cyclone V -> 145 Mhz 511 ALMs
|
Cyclone V -> 145 Mhz 511 ALMs
|
||||||
Cyclone IV -> 144 Mhz 1,145 LUT 531 FF
|
Cyclone IV -> 144 Mhz 1,145 LUT 531 FF
|
||||||
iCE40 -> 66 Mhz 1680 LC
|
iCE40 -> 66 Mhz 1680 LC
|
||||||
|
|
||||||
VexRiscv full no cache (RV32IM, 1.21 DMIPS/Mhz 2.30 Coremark/Mhz, single cycle barrel shifter, debug module, catch exceptions, static branch) ->
|
VexRiscv full no cache (RV32IM, 1.21 DMIPS/Mhz 2.30 Coremark/Mhz, single cycle barrel shifter, debug module, catch exceptions, static branch) ->
|
||||||
Artix 7 -> 326 Mhz 1544 LUT 977 FF
|
Artix 7 -> 219 Mhz 1537 LUT 977 FF
|
||||||
Cyclone V -> 139 Mhz 958 ALMs
|
Cyclone V -> 139 Mhz 958 ALMs
|
||||||
Cyclone IV -> 135 Mhz 2,011 LUT 968 FF
|
Cyclone IV -> 135 Mhz 2,011 LUT 968 FF
|
||||||
|
|
||||||
VexRiscv full (RV32IM, 1.21 DMIPS/Mhz 2.30 Coremark/Mhz with cache trashing, 4KB-I$,4KB-D$, single cycle barrel shifter, debug module, catch exceptions, static branch) ->
|
VexRiscv full (RV32IM, 1.21 DMIPS/Mhz 2.30 Coremark/Mhz with cache trashing, 4KB-I$,4KB-D$, single cycle barrel shifter, debug module, catch exceptions, static branch) ->
|
||||||
Artix 7 -> 279 Mhz 1686 LUT 1172 FF
|
Artix 7 -> 193 Mhz 1706 LUT 1172 FF
|
||||||
Cyclone V -> 144 Mhz 1,128 ALMs
|
Cyclone V -> 144 Mhz 1,128 ALMs
|
||||||
Cyclone IV -> 133 Mhz 2,298 LUT 1,096 FF
|
Cyclone IV -> 133 Mhz 2,298 LUT 1,096 FF
|
||||||
|
|
||||||
VexRiscv full max dmips/mhz -> (RV32IM, 1.44 DMIPS/Mhz 2.70 Coremark/Mhz,, 16KB-I$,16KB-D$, single cycle barrel shifter, debug module, catch exceptions, dynamic branch prediction in the fetch stage, branch and shift operations done in the Execute stage) ->
|
VexRiscv full max dmips/mhz -> (RV32IM, 1.44 DMIPS/Mhz 2.70 Coremark/Mhz,, 16KB-I$,16KB-D$, single cycle barrel shifter, debug module, catch exceptions, dynamic branch prediction in the fetch stage, branch and shift operations done in the Execute stage) ->
|
||||||
Artix 7 -> 193 Mhz 1758 LUT 1094 FF
|
Artix 7 -> 140 Mhz 1767 LUT 1128 FF
|
||||||
Cyclone V -> 90 Mhz 1,089 ALMs
|
Cyclone V -> 90 Mhz 1,089 ALMs
|
||||||
Cyclone IV -> 79 Mhz 2,336 LUT 1,048 FF
|
Cyclone IV -> 79 Mhz 2,336 LUT 1,048 FF
|
||||||
|
|
||||||
VexRiscv full with MMU (RV32IM, 1.24 DMIPS/Mhz 2.35 Coremark/Mhz, with cache trashing, 4KB-I$, 4KB-D$, single cycle barrel shifter, debug module, catch exceptions, dynamic branch, MMU) ->
|
VexRiscv full with MMU (RV32IM, 1.24 DMIPS/Mhz 2.35 Coremark/Mhz, with cache trashing, 4KB-I$, 4KB-D$, single cycle barrel shifter, debug module, catch exceptions, dynamic branch, MMU) ->
|
||||||
Artix 7 -> 239 Mhz 2029 LUT 1585 FF
|
Artix 7 -> 161 Mhz 1985 LUT 1585 FF
|
||||||
Cyclone V -> 124 Mhz 1,319 ALMs
|
Cyclone V -> 124 Mhz 1,319 ALMs
|
||||||
Cyclone IV -> 122 Mhz 2,710 LUT 1,501 FF
|
Cyclone IV -> 122 Mhz 2,710 LUT 1,501 FF
|
||||||
|
|
||||||
VexRiscv linux balanced (RV32IMA, 1.21 DMIPS/Mhz 2.27 Coremark/Mhz, with cache trashing, 4KB-I$, 4KB-D$, single cycle barrel shifter, catch exceptions, static branch, MMU, Supervisor, Compatible with mainstream linux) ->
|
VexRiscv linux balanced (RV32IMA, 1.21 DMIPS/Mhz 2.27 Coremark/Mhz, with cache trashing, 4KB-I$, 4KB-D$, single cycle barrel shifter, catch exceptions, static branch, MMU, Supervisor, Compatible with mainstream linux) ->
|
||||||
Artix 7 -> 249 Mhz 2549 LUT 2014 FF
|
Artix 7 -> 170 Mhz 2530 LUT 2013 FF
|
||||||
Cyclone V -> 125 Mhz 1,618 ALMs
|
Cyclone V -> 125 Mhz 1,618 ALMs
|
||||||
Cyclone IV -> 116 Mhz 3,314 LUT 2,016 FF
|
Cyclone IV -> 116 Mhz 3,314 LUT 2,016 FF
|
||||||
|
|
||||||
|
@ -296,7 +296,7 @@ You can find some FPGA projects which instantiate the Briey SoC here (DE1-SoC, D
|
||||||
Here are some measurements of Briey SoC timings and area :
|
Here are some measurements of Briey SoC timings and area :
|
||||||
|
|
||||||
```
|
```
|
||||||
Artix 7 -> 275 Mhz 3072 LUT 3291 FF
|
Artix 7 -> 186 Mhz 3138 LUT 3328 FF
|
||||||
Cyclone V -> 139 Mhz 2,175 ALMs
|
Cyclone V -> 139 Mhz 2,175 ALMs
|
||||||
Cyclone IV -> 129 Mhz 4,337 LUT 3,170 FF
|
Cyclone IV -> 129 Mhz 4,337 LUT 3,170 FF
|
||||||
```
|
```
|
||||||
|
@ -351,13 +351,13 @@ Here are some timing and area measurements of the Murax SoC:
|
||||||
|
|
||||||
```
|
```
|
||||||
Murax interlocked stages (0.45 DMIPS/Mhz, 8 bits GPIO) ->
|
Murax interlocked stages (0.45 DMIPS/Mhz, 8 bits GPIO) ->
|
||||||
Artix 7 -> 313 Mhz 1039 LUT 1200 FF
|
Artix 7 -> 215 Mhz 1044 LUT 1202 FF
|
||||||
Cyclone V -> 173 Mhz 737 ALMs
|
Cyclone V -> 173 Mhz 737 ALMs
|
||||||
Cyclone IV -> 144 Mhz 1,484 LUT 1,206 FF
|
Cyclone IV -> 144 Mhz 1,484 LUT 1,206 FF
|
||||||
iCE40 -> 64 Mhz 2422 LC (nextpnr)
|
iCE40 -> 64 Mhz 2422 LC (nextpnr)
|
||||||
|
|
||||||
MuraxFast bypassed stages (0.65 DMIPS/Mhz, 8 bits GPIO) ->
|
MuraxFast bypassed stages (0.65 DMIPS/Mhz, 8 bits GPIO) ->
|
||||||
Artix 7 -> 323 Mhz 1241 LUT 1301 FF
|
Artix 7 -> 229 Mhz 1269 LUT 1302 FF
|
||||||
Cyclone V -> 159 Mhz 864 ALMs
|
Cyclone V -> 159 Mhz 864 ALMs
|
||||||
Cyclone IV -> 137 Mhz 1,688 LUT 1,241 FF
|
Cyclone IV -> 137 Mhz 1,688 LUT 1,241 FF
|
||||||
iCE40 -> 66 Mhz 2799 LC (nextpnr)
|
iCE40 -> 66 Mhz 2799 LC (nextpnr)
|
||||||
|
|
|
@ -22,7 +22,8 @@ case class InstructionCacheConfig( cacheSize : Int,
|
||||||
asyncTagMemory : Boolean,
|
asyncTagMemory : Boolean,
|
||||||
twoCycleCache : Boolean = true,
|
twoCycleCache : Boolean = true,
|
||||||
twoCycleRam : Boolean = false,
|
twoCycleRam : Boolean = false,
|
||||||
preResetFlush : Boolean = false){
|
preResetFlush : Boolean = false,
|
||||||
|
bypassGen : Boolean = false ){
|
||||||
|
|
||||||
assert(!(twoCycleRam && !twoCycleCache))
|
assert(!(twoCycleRam && !twoCycleCache))
|
||||||
|
|
||||||
|
@ -108,8 +109,8 @@ case class InstructionCacheCpuFetch(p : InstructionCacheConfig) extends Bundle w
|
||||||
val isRemoved = Bool()
|
val isRemoved = Bool()
|
||||||
val pc = UInt(p.addressWidth bits)
|
val pc = UInt(p.addressWidth bits)
|
||||||
val data = Bits(p.cpuDataWidth bits)
|
val data = Bits(p.cpuDataWidth bits)
|
||||||
val dataBypassValid = Bool()
|
val dataBypassValid = p.bypassGen generate Bool()
|
||||||
val dataBypass = Bits(p.cpuDataWidth bits)
|
val dataBypass = p.bypassGen generate Bits(p.cpuDataWidth bits)
|
||||||
val mmuBus = MemoryTranslatorBus()
|
val mmuBus = MemoryTranslatorBus()
|
||||||
val physicalAddress = UInt(p.addressWidth bits)
|
val physicalAddress = UInt(p.addressWidth bits)
|
||||||
val cacheMiss, error, mmuRefilling, mmuException, isUser = ifGen(!p.twoCycleCache)(Bool)
|
val cacheMiss, error, mmuRefilling, mmuException, isUser = ifGen(!p.twoCycleCache)(Bool)
|
||||||
|
@ -415,15 +416,16 @@ class InstructionCache(p : InstructionCacheConfig) extends Component{
|
||||||
val id = OHToUInt(hits)
|
val id = OHToUInt(hits)
|
||||||
val error = read.waysValues.map(_.tag.error).read(id)
|
val error = read.waysValues.map(_.tag.error).read(id)
|
||||||
val data = read.waysValues.map(_.data).read(id)
|
val data = read.waysValues.map(_.data).read(id)
|
||||||
val word = data.subdivideIn(cpuDataWidth bits).read(io.cpu.fetch.pc(memWordToCpuWordRange))
|
val word = if(cpuDataWidth == memDataWidth) CombInit(data) else data.subdivideIn(cpuDataWidth bits).read(io.cpu.fetch.pc(memWordToCpuWordRange))
|
||||||
io.cpu.fetch.data := (io.cpu.fetch.dataBypassValid ? io.cpu.fetch.dataBypass | word)
|
io.cpu.fetch.data := (if(p.bypassGen) (io.cpu.fetch.dataBypassValid ? io.cpu.fetch.dataBypass | word) else word)
|
||||||
if(twoCycleCache){
|
if(twoCycleCache){
|
||||||
io.cpu.decode.data := RegNextWhen(io.cpu.fetch.data,!io.cpu.decode.isStuck)
|
io.cpu.decode.data := RegNextWhen(io.cpu.fetch.data,!io.cpu.decode.isStuck)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if(twoCycleRam && wayCount == 1){
|
if(twoCycleRam && wayCount == 1){
|
||||||
io.cpu.fetch.data := (io.cpu.fetch.dataBypassValid ? io.cpu.fetch.dataBypass | read.waysValues.head.data.subdivideIn(cpuDataWidth bits).read(io.cpu.fetch.pc(memWordToCpuWordRange)))
|
val cacheData = if(cpuDataWidth == memDataWidth) CombInit(read.waysValues.head.data) else read.waysValues.head.data.subdivideIn(cpuDataWidth bits).read(io.cpu.fetch.pc(memWordToCpuWordRange))
|
||||||
|
io.cpu.fetch.data := (if(p.bypassGen) (io.cpu.fetch.dataBypassValid ? io.cpu.fetch.dataBypass | cacheData) else cacheData)
|
||||||
}
|
}
|
||||||
|
|
||||||
io.cpu.fetch.mmuBus.cmd.isValid := io.cpu.fetch.isValid
|
io.cpu.fetch.mmuBus.cmd.isValid := io.cpu.fetch.isValid
|
||||||
|
@ -458,8 +460,8 @@ class InstructionCache(p : InstructionCacheConfig) extends Component{
|
||||||
val id = OHToUInt(hits)
|
val id = OHToUInt(hits)
|
||||||
val error = tags(id).error
|
val error = tags(id).error
|
||||||
val data = fetchStage.read.waysValues.map(way => stage(way.data)).read(id)
|
val data = fetchStage.read.waysValues.map(way => stage(way.data)).read(id)
|
||||||
val word = data.subdivideIn(cpuDataWidth bits).read(io.cpu.decode.pc(memWordToCpuWordRange))
|
val word = if(cpuDataWidth == memDataWidth) data else data.subdivideIn(cpuDataWidth bits).read(io.cpu.decode.pc(memWordToCpuWordRange))
|
||||||
when(stage(io.cpu.fetch.dataBypassValid)){
|
if(p.bypassGen) when(stage(io.cpu.fetch.dataBypassValid)){
|
||||||
word := stage(io.cpu.fetch.dataBypass)
|
word := stage(io.cpu.fetch.dataBypass)
|
||||||
}
|
}
|
||||||
io.cpu.decode.data := word
|
io.cpu.decode.data := word
|
||||||
|
|
|
@ -945,14 +945,9 @@ class CsrPlugin(val config: CsrPluginConfig) extends Plugin[VexRiscv] with Excep
|
||||||
val readData = B(0, 32 bits)
|
val readData = B(0, 32 bits)
|
||||||
val writeInstruction = arbitration.isValid && input(IS_CSR) && input(CSR_WRITE_OPCODE)
|
val writeInstruction = arbitration.isValid && input(IS_CSR) && input(CSR_WRITE_OPCODE)
|
||||||
val readInstruction = arbitration.isValid && input(IS_CSR) && input(CSR_READ_OPCODE)
|
val readInstruction = arbitration.isValid && input(IS_CSR) && input(CSR_READ_OPCODE)
|
||||||
val writeEnable = writeInstruction && ! blockedBySideEffects // && readDataRegValid
|
val writeEnable = writeInstruction && ! blockedBySideEffects && !arbitration.isStuckByOthers// && readDataRegValid
|
||||||
val readEnable = readInstruction && ! blockedBySideEffects // && !readDataRegValid
|
val readEnable = readInstruction && ! blockedBySideEffects && !arbitration.isStuckByOthers// && !readDataRegValid
|
||||||
//arbitration.isStuckByOthers, in case of the hazardPlugin is in the executeStage
|
//arbitration.isStuckByOthers, in case of the hazardPlugin is in the executeStage
|
||||||
val hazardStage = service(classOf[RegFileService]).readStage()
|
|
||||||
if(hazardStage == execute) when (arbitration.isStuckByOthers){
|
|
||||||
writeEnable := False
|
|
||||||
readEnable := False
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// def readDataReg = memory.input(REGFILE_WRITE_DATA) //PIPE OPT
|
// def readDataReg = memory.input(REGFILE_WRITE_DATA) //PIPE OPT
|
||||||
|
|
|
@ -61,7 +61,7 @@ class IBusCachedPlugin(resetVector : BigInt = 0x80000000l,
|
||||||
var redoBranch : Flow[UInt] = null
|
var redoBranch : Flow[UInt] = null
|
||||||
var decodeExceptionPort : Flow[ExceptionCause] = null
|
var decodeExceptionPort : Flow[ExceptionCause] = null
|
||||||
val tightlyCoupledPorts = ArrayBuffer[TightlyCoupledPort]()
|
val tightlyCoupledPorts = ArrayBuffer[TightlyCoupledPort]()
|
||||||
|
def tightlyGen = tightlyCoupledPorts.nonEmpty
|
||||||
|
|
||||||
def newTightlyCoupledPort(p : TightlyCoupledPortParameter) = {
|
def newTightlyCoupledPort(p : TightlyCoupledPortParameter) = {
|
||||||
val port = TightlyCoupledPort(p, null)
|
val port = TightlyCoupledPort(p, null)
|
||||||
|
@ -125,7 +125,7 @@ class IBusCachedPlugin(resetVector : BigInt = 0x80000000l,
|
||||||
import pipeline.config._
|
import pipeline.config._
|
||||||
|
|
||||||
pipeline plug new FetchArea(pipeline) {
|
pipeline plug new FetchArea(pipeline) {
|
||||||
val cache = new InstructionCache(IBusCachedPlugin.this.config)
|
val cache = new InstructionCache(IBusCachedPlugin.this.config.copy(bypassGen = tightlyGen))
|
||||||
iBus = master(new InstructionCacheMemBus(IBusCachedPlugin.this.config)).setName("iBus")
|
iBus = master(new InstructionCacheMemBus(IBusCachedPlugin.this.config)).setName("iBus")
|
||||||
iBus <> cache.io.mem
|
iBus <> cache.io.mem
|
||||||
iBus.cmd.address.allowOverride := cache.io.mem.cmd.address
|
iBus.cmd.address.allowOverride := cache.io.mem.cmd.address
|
||||||
|
@ -165,8 +165,8 @@ class IBusCachedPlugin(resetVector : BigInt = 0x80000000l,
|
||||||
val tightlyCoupledHits = RegNextWhen(s0.tightlyCoupledHits, stages(1).input.ready)
|
val tightlyCoupledHits = RegNextWhen(s0.tightlyCoupledHits, stages(1).input.ready)
|
||||||
val tightlyCoupledHit = RegNextWhen(s0.tightlyCoupledHit, stages(1).input.ready)
|
val tightlyCoupledHit = RegNextWhen(s0.tightlyCoupledHit, stages(1).input.ready)
|
||||||
|
|
||||||
cache.io.cpu.fetch.dataBypassValid := tightlyCoupledHit
|
if(tightlyGen) cache.io.cpu.fetch.dataBypassValid := tightlyCoupledHit
|
||||||
cache.io.cpu.fetch.dataBypass := (if(tightlyCoupledPorts.isEmpty) B(0) else MuxOH(tightlyCoupledHits, tightlyCoupledPorts.map(e => CombInit(e.bus.data))))
|
if(tightlyGen) cache.io.cpu.fetch.dataBypass := MuxOH(tightlyCoupledHits, tightlyCoupledPorts.map(e => CombInit(e.bus.data)))
|
||||||
|
|
||||||
//Connect fetch cache side
|
//Connect fetch cache side
|
||||||
cache.io.cpu.fetch.isValid := stages(1).input.valid && !tightlyCoupledHit
|
cache.io.cpu.fetch.isValid := stages(1).input.valid && !tightlyCoupledHit
|
||||||
|
|
Loading…
Reference in New Issue