IBusCachedPlugin add two stage cache option for better FMax and better scaling

This commit is contained in:
Charles Papon 2017-04-08 17:42:13 +02:00
parent 5c594d6d2a
commit e3b9e671ec
5 changed files with 202 additions and 68 deletions

View File

@ -131,7 +131,7 @@ class DecoderSimplePlugin(catchIllegalInstruction : Boolean) extends Plugin[VexR
if(catchIllegalInstruction){
decodeExceptionPort.valid := arbitration.isValid && !input(LEGAL_INSTRUCTION)
decodeExceptionPort.valid := arbitration.isValid && arbitration.haltIt && !input(LEGAL_INSTRUCTION) //HalitIt to alow decoder stage to wait valid data from 2 stages cache cache
decodeExceptionPort.code := 2
decodeExceptionPort.badAddr.assignDontCare()
}

View File

@ -13,7 +13,8 @@ case class InstructionCacheConfig( cacheSize : Int,
cpuDataWidth : Int,
memDataWidth : Int,
catchAccessFault : Boolean,
asyncTagMemory : Boolean){
asyncTagMemory : Boolean,
twoStageLogic : Boolean){
def burstSize = bytePerLine*8/memDataWidth
}
@ -52,15 +53,24 @@ class IBusCachedPlugin(config : InstructionCacheConfig) extends Plugin[VexRiscv]
//Connect fetch cache side
cache.io.cpu.fetch.isValid := fetch.arbitration.isValid
cache.io.cpu.fetch.isStuck := fetch.arbitration.isStuck
if(!twoStageLogic) cache.io.cpu.fetch.isStuckByOthers := fetch.arbitration.isStuckByOthers
cache.io.cpu.fetch.address := fetch.output(PC)
fetch.arbitration.haltIt setWhen(cache.io.cpu.fetch.haltIt)
fetch.insert(INSTRUCTION) := cache.io.cpu.fetch.data
if(!twoStageLogic) fetch.arbitration.haltIt setWhen(cache.io.cpu.fetch.haltIt)
if(!twoStageLogic) fetch.insert(INSTRUCTION) := cache.io.cpu.fetch.data
cache.io.flush.cmd.valid := False
if(twoStageLogic){
cache.io.cpu.decode.isValid := decode.arbitration.isValid
decode.arbitration.haltIt.setWhen(cache.io.cpu.decode.haltIt)
cache.io.cpu.decode.isStuck := decode.arbitration.isStuck
cache.io.cpu.decode.address := decode.input(PC)
decode.insert(INSTRUCTION) := cache.io.cpu.decode.data
}
if(catchAccessFault){
fetch.insert(IBUS_ACCESS_ERROR) := cache.io.cpu.fetch.error
if(!twoStageLogic) fetch.insert(IBUS_ACCESS_ERROR) := cache.io.cpu.fetch.error
decodeExceptionPort.valid := decode.arbitration.isValid && decode.input(IBUS_ACCESS_ERROR)
decodeExceptionPort.code := 1
@ -71,7 +81,7 @@ class IBusCachedPlugin(config : InstructionCacheConfig) extends Plugin[VexRiscv]
case class InstructionCacheCpuCmd(p : InstructionCacheConfig) extends Bundle with IMasterSlave{
case class InstructionCacheCpuPrefetch(p : InstructionCacheConfig) extends Bundle with IMasterSlave{
val isValid = Bool
val isFiring = Bool
val haltIt = Bool
@ -83,7 +93,24 @@ case class InstructionCacheCpuCmd(p : InstructionCacheConfig) extends Bundle wit
}
}
case class InstructionCacheCpuRsp(p : InstructionCacheConfig) extends Bundle with IMasterSlave {
case class InstructionCacheCpuFetch(p : InstructionCacheConfig) extends Bundle with IMasterSlave {
val isValid = Bool
val haltIt = if(!p.twoStageLogic) Bool else null
val isStuck = Bool
val isStuckByOthers = if(!p.twoStageLogic) Bool else null
val address = UInt(p.addressWidth bit)
val data = if(!p.twoStageLogic) Bits(32 bit) else null
val error = if(!p.twoStageLogic && p.catchAccessFault) Bool else null
override def asMaster(): Unit = {
out(isValid, isStuck, address)
outWithNull(isStuckByOthers)
inWithNull(error,data,haltIt)
}
}
case class InstructionCacheCpuDecode(p : InstructionCacheConfig) extends Bundle with IMasterSlave {
require(p.twoStageLogic)
val isValid = Bool
val haltIt = Bool
val isStuck = Bool
@ -98,14 +125,15 @@ case class InstructionCacheCpuRsp(p : InstructionCacheConfig) extends Bundle wit
}
}
case class InstructionCacheCpuBus(p : InstructionCacheConfig) extends Bundle with IMasterSlave{
val prefetch = InstructionCacheCpuCmd(p)
val fetch = InstructionCacheCpuRsp(p)
val prefetch = InstructionCacheCpuPrefetch(p)
val fetch = InstructionCacheCpuFetch(p)
val decode = if(p.twoStageLogic) InstructionCacheCpuDecode(p) else null
override def asMaster(): Unit = {
master(prefetch)
master(fetch)
if(p.twoStageLogic) master(decode)
}
}
@ -181,10 +209,6 @@ class InstructionCache(p : InstructionCacheConfig) extends Component{
val address = UInt(tagRange.length bit)
}
// class LineWord extends Bundle{
// val data = Bits(wordWidth bits)
// val error = Bool
// }
val ways = Array.fill(wayCount)(new Area{
val tags = Mem(new LineInfo(),wayLineCount)
@ -247,10 +271,14 @@ class InstructionCache(p : InstructionCacheConfig) extends Component{
val loadedWords = RegNext(loadedWordsNext)
val loadedWordsReadable = RegNext(loadedWords)
loadedWordsNext := loadedWords
val waysWritePort = ways(0).datas.writePort //Not multi ways
waysWritePort.valid := io.mem.rsp.valid
waysWritePort.address := request.addr(lineRange) @@ wordIndex
waysWritePort.data := io.mem.rsp.data
when(io.mem.rsp.valid){
wordIndex := wordIndex + 1
loadedWordsNext(wordIndex) := True
ways(0).datas(request.addr(lineRange) @@ wordIndex) := io.mem.rsp.data //TODO
if(catchAccessFault) loadingWithError setWhen io.mem.rsp.error
}
@ -278,7 +306,7 @@ class InstructionCache(p : InstructionCacheConfig) extends Component{
}
}
val task = new Area{
val task = if(!twoStageLogic) new Area{
val waysHitValid = False
val waysHitError = Bool.assignDontCare()
val waysHitWord = Bits(wordWidth bit)
@ -310,13 +338,93 @@ class InstructionCache(p : InstructionCacheConfig) extends Component{
io.cpu.fetch.haltIt := io.cpu.fetch.isValid && !(waysHitValid || (loaderHitValid && loaderHitReady))
io.cpu.fetch.data := waysHitWord //TODO
if(catchAccessFault) io.cpu.fetch.error := (waysHitValid && waysHitError) || (loaderHitValid && loaderHitReady && lineLoader.loadingWithErrorReg)
lineLoader.requestIn.valid := io.cpu.fetch.isValid && ! waysHitValid
lineLoader.requestIn.valid := io.cpu.fetch.isValid && !io.cpu.fetch.isStuckByOthers && !waysHitValid
lineLoader.requestIn.addr := io.cpu.fetch.address
} else new Area{
val waysHitValid = False
val waysHitError = Bool.assignDontCare()
val waysHitWord = Bits(wordWidth bit)
val waysRead = for(way <- ways) yield new Area{
val tag = if(asyncTagMemory)
way.tags.readAsync(io.cpu.fetch.address(lineRange))
else
way.tags.readSync(io.cpu.prefetch.address(lineRange),enable = !io.cpu.fetch.isStuck)
val data = way.datas.readSync(io.cpu.prefetch.address(lineRange.high downto wordRange.low),enable = !io.cpu.fetch.isStuck)
waysHitWord := data //Not applicable to multi way
when(tag.valid && tag.address === io.cpu.fetch.address(tagRange)) {
waysHitValid := True
if(catchAccessFault) waysHitError := tag.error
}
when(lineLoader.request.valid && lineLoader.request.addr(lineRange) === io.cpu.fetch.address(lineRange)){
waysHitValid := False //Not applicable to multi way
}
}
val loadedWord = new Area{
val valid = RegNext(lineLoader.waysWritePort.valid)
val address = RegNext(lineLoader.request.addr(tagLineRange) @@ lineLoader.wordIndex @@ U"00")
val data = RegNext(lineLoader.waysWritePort.data)
}
val fetchInstructionValid = Bool
val fetchInstructionValue = Bits(32 bits)
val fetchInstructionValidReg = Reg(Bool)
val fetchInstructionValueReg = Reg(Bits(32 bits))
when(fetchInstructionValidReg){
fetchInstructionValid := True
fetchInstructionValue := fetchInstructionValueReg
}.elsewhen(loadedWord.valid && (loadedWord.address >> 2) === (io.cpu.fetch.address >> 2)){
fetchInstructionValid := True
fetchInstructionValue := loadedWord.data
} otherwise{
fetchInstructionValid := waysHitValid
fetchInstructionValue := waysHitWord
}
when(io.cpu.fetch.isStuck){
fetchInstructionValidReg := fetchInstructionValid
fetchInstructionValueReg := fetchInstructionValue
} otherwise {
fetchInstructionValidReg := False
}
val decodeInstructionValid = Reg(Bool)
val decodeInstructionReg = Reg(Bits(32 bits))
when(!io.cpu.decode.isStuck){
decodeInstructionValid := fetchInstructionValid
decodeInstructionReg := fetchInstructionValue
}.elsewhen(loadedWord.valid && (loadedWord.address >> 2) === (io.cpu.decode.address >> 2)){
decodeInstructionValid := True
decodeInstructionReg := loadedWord.data
}
io.cpu.decode.haltIt := io.cpu.decode.isValid && !decodeInstructionValid
io.cpu.decode.data := decodeInstructionReg
lineLoader.requestIn.valid := io.cpu.decode.isValid && !decodeInstructionValid
lineLoader.requestIn.addr := io.cpu.decode.address
}
io.flush.cmd.ready := !(lineLoader.request.valid || io.cpu.fetch.isValid)
}
//
//object InstructionCacheMain{
//
// def main(args: Array[String]) {

View File

@ -200,23 +200,24 @@ object TopLevel {
configTest.plugins ++= List(
new PcManagerSimplePlugin(0x00000000l, true),
new IBusSimplePlugin(
interfaceKeepData = true,
catchAccessFault = false
),
// new IBusCachedPlugin(
// config = InstructionCacheConfig(
// cacheSize = 4096,
// bytePerLine =32,
// wayCount = 1,
// wrappedMemAccess = true,
// addressWidth = 32,
// cpuDataWidth = 32,
// memDataWidth = 32,
// catchAccessFault = false,
// asyncTagMemory = false
// )
// new IBusSimplePlugin(
// interfaceKeepData = true,
// catchAccessFault = false
// ),
new IBusCachedPlugin(
config = InstructionCacheConfig(
cacheSize = 4096,
bytePerLine =32,
wayCount = 1,
wrappedMemAccess = true,
addressWidth = 32,
cpuDataWidth = 32,
memDataWidth = 32,
catchAccessFault = false,
asyncTagMemory = false,
twoStageLogic = true
)
),
new DBusSimplePlugin(
catchAddressMisaligned = false,
@ -238,22 +239,22 @@ object TopLevel {
catchIllegalInstruction = false
),
new RegFilePlugin(
regFileReadyKind = Plugin.SYNC,
regFileReadyKind = Plugin.ASYNC,
zeroBoot = false
),
new IntAluPlugin,
new SrcPlugin(
separatedAddSub = true
separatedAddSub = false
),
new FullBarrielShifterPlugin,
// new LightShifterPlugin,
// new HazardSimplePlugin(true, true, true, true),
// new HazardSimplePlugin(false, true, false, true),
new HazardSimplePlugin(
bypassExecute = true,
bypassMemory = true,
bypassWriteBack = true,
bypassWriteBackBuffer = true,
bypassExecute = false,
bypassMemory = false,
bypassWriteBack = false,
bypassWriteBackBuffer = false,
pessimisticUseSrc = false,
pessimisticWriteRegFile = false,
pessimisticAddressMatch = false
@ -268,12 +269,13 @@ object TopLevel {
)
)
val toplevel = new VexRiscv(configFull)
// val toplevel = new VexRiscv(configFull)
// val toplevel = new VexRiscv(configLight)
// val toplevel = new VexRiscv(configTest)
val toplevel = new VexRiscv(configTest)
toplevel.decode.input(toplevel.config.INSTRUCTION).addAttribute(Verilator.public)
toplevel.decode.input(toplevel.config.PC).addAttribute(Verilator.public)
toplevel.decode.arbitration.isValid.addAttribute(Verilator.public)
toplevel.decode.arbitration.haltIt.addAttribute(Verilator.public)
// toplevel.writeBack.input(config.PC).addAttribute(Verilator.public)
// toplevel.service(classOf[DecoderSimplePlugin]).bench(toplevel)
@ -285,4 +287,5 @@ object TopLevel {
//TODO DivPlugin should not used MixedDivider (double twoComplement)
//TODO DivPlugin should register the twoComplement output before pipeline insertion
//TODO MulPlugin doesn't fit well on Artix (FMAX)
//TODO PcReg design is unoptimized by Artix synthesis
//TODO PcReg design is unoptimized by Artix synthesis
//TODO FMAX SRC mux + bipass mux prioriti

View File

@ -1,42 +1,65 @@
[*]
[*] GTKWave Analyzer v3.3.58 (w)1999-2014 BSI
[*] Sat Apr 1 15:43:19 2017
[*] Sat Apr 8 15:08:01 2017
[*]
[dumpfile] "/home/spinalvm/Spinal/VexRiscv/src/test/cpp/testA/dhrystoneO3.vcd"
[dumpfile_mtime] "Sat Apr 1 15:42:10 2017"
[dumpfile_size] 214475745
[dumpfile] "/home/spinalvm/Spinal/VexRiscv/src/test/cpp/testA/rv32ui-p-simple.vcd"
[dumpfile_mtime] "Sat Apr 8 15:02:54 2017"
[dumpfile_size] 95378
[savefile] "/home/spinalvm/Spinal/VexRiscv/src/test/cpp/testA/fail.gtkw"
[timestart] 0
[timestart] 211
[size] 1776 953
[pos] -1 -1
*-16.000000 553 48755 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
*-4.422177 320 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
[treeopen] TOP.
[treeopen] TOP.VexRiscv.
[sst_width] 313
[signals_width] 558
[sst_width] 201
[signals_width] 397
[sst_expanded] 1
[sst_vpaned_height] 593
[sst_vpaned_height] 279
@800200
-prefetch
@28
TOP.clk
TOP.reset
TOP.VexRiscv.instructionCache_1.io_cpu_prefetch_haltIt
@22
TOP.VexRiscv.dataCache_1.io_mem_rsp_payload_data[31:0]
TOP.VexRiscv.instructionCache_1.io_cpu_prefetch_address[31:0]
@1000200
-prefetch
@800200
-fetch
@28
TOP.VexRiscv.dataCache_1.io_mem_rsp_valid
TOP.VexRiscv.instructionCache_1.io_cpu_fetch_isValid
TOP.VexRiscv.instructionCache_1.io_cpu_fetch_isStuck
@22
TOP.VexRiscv.dataCache_1.io_cpu_writeBack_data[31:0]
TOP.VexRiscv.instructionCache_1.io_cpu_fetch_address[31:0]
@1000200
-fetch
@800200
-decode
@28
TOP.VexRiscv.writeBack_MEMORY_ENABLE
TOP.VexRiscv.writeBack_arbitration_isFiring
TOP.VexRiscv.dataCache_1.ways_0_data_port0_enable
@22
TOP.VexRiscv.dataCache_1.ways_0_data_port0_data[31:0]
@28
TOP.VexRiscv.dataCache_1.manager_cpuRspIn_ready
TOP.VexRiscv.instructionCache_1.io_cpu_decode_isValid
@29
TOP.VexRiscv.dataCache_1.manager_cpuRspIn_valid
TOP.VexRiscv.instructionCache_1.io_cpu_decode_haltIt
@28
TOP.VexRiscv.dataCache_1.manager_cpuRsp_ready
TOP.VexRiscv.dataCache_1.manager_cpuRsp_valid
TOP.VexRiscv.instructionCache_1.io_cpu_decode_isStuck
@22
TOP.VexRiscv.instructionCache_1.io_cpu_decode_address[31:0]
TOP.VexRiscv.instructionCache_1.io_cpu_decode_instruction[31:0]
@1000200
-decode
@800200
-ibus
@22
TOP.VexRiscv.instructionCache_1.io_mem_cmd_payload_address[31:0]
@28
TOP.VexRiscv.instructionCache_1.io_mem_cmd_ready
TOP.VexRiscv.instructionCache_1.io_mem_cmd_valid
@22
TOP.VexRiscv.instructionCache_1.io_mem_rsp_payload_data[31:0]
@28
TOP.VexRiscv.instructionCache_1.io_mem_rsp_valid
@1000200
-ibus
@28
TOP.VexRiscv.instructionCache_1.clk
[pattern_trace] 1
[pattern_trace] 0

View File

@ -340,7 +340,7 @@ public:
for(SimElement* simElement : simElements) simElement->preCycle();
if(top->VexRiscv->decode_arbitration_isValid){
if(top->VexRiscv->decode_arbitration_isValid && !top->VexRiscv->decode_arbitration_haltIt){
uint32_t expectedData;
bool dummy;
iBusAccess(top->VexRiscv->decode_PC, &expectedData, &dummy);