rework fetchPc to optionaly share the pcReg with the stage(1)

IBusSimplePlugin now implement cmdForkPersistence option
This commit is contained in:
Dolu1990 2018-10-03 16:24:10 +02:00
parent c61f17aea3
commit 48bff80653
19 changed files with 95 additions and 93 deletions

View file

@ -422,7 +422,8 @@ val cpu = new VexRiscv(
plugins = List(
new IBusSimplePlugin(
resetVector = 0x00000000l,
relaxedPcCalculation = true
cmdForkOnSecondStage = true,
cmdForkPersistence = true
),
new DBusSimplePlugin(
catchAddressMisaligned = false,
@ -643,19 +644,6 @@ This chapter describes plugins currently implemented.
- [DebugPlugin](#debugplugin)
- [YamlPlugin](#yamlplugin)
#### PcManagerSimplePlugin
This plugin implements the program counter and a jump service to all plugins.
| Parameters | type | description |
| ------ | ----------- | ------ |
| resetVector | BigInt | Address of the program counter after the reset |
| relaxedPcCalculation | Boolean | By default jump have an asynchronous immediate effect on the program counter, which allow to reduce the branch penalties by one cycle but could reduce the FMax as it will combinatorialy drive the instruction bus address signal. To avoid this you can set this parameter to true, which will make the jump affecting the programm counter in a sequancial way, which will cut the combinatorial path but add one additional cycle of penalty when a jump occur. |
This plugin operates on the prefetch stage.
#### IBusSimplePlugin
@ -665,8 +653,8 @@ This plugin implement the CPU frontend (instruction fetch) via a very simple and
| ------ | ----------- | ------ |
| catchAccessFault | Boolean | If an the read response specify an read error and this parameter is true, it will generate an CPU exception trap |
| resetVector | BigInt | Address of the program counter after the reset |
| relaxedPcCalculation | Boolean | By default jump have an asynchronous immediate effect on the program counter, which allow to reduce the branch penalties by one cycle but could reduce the FMax as it will combinatorialy drive the instruction bus address signal. To avoid this you can set this parameter to true, which will make the jump affecting the programm counter in a sequancial way, which will cut the combinatorial path but add one additional cycle of penalty when a jump occur. |
| relaxedBusCmdValid | Boolean | Same than relaxedPcCalculation, but for the iBus.cmd.valid pin. |
| cmdForkOnSecondStage | Boolean | By default jump have an asynchronous immediate effect on the program counter, which allow to reduce the branch penalties by one cycle but could reduce the FMax as it will combinatorialy drive the instruction bus address signal. To avoid this you can set this parameter to true, which will make the jump affecting the programm counter in a sequancial way, which will cut the combinatorial path but add one additional cycle of penalty when a jump occur. |
| cmdForkPersistence | Boolean | If this parameter is false, then request on the iBus can disappear/change before their completion. Which reduce area but isn't safe/supported by many arbitration/slaves. If you set this parameter to true, then the iBus cmd will stay until they are completed.
| compressedGen | Boolean | Enable RVC support |
| busLatencyMin | Int | Specify the minimal latency between the iBus.cmd and iBus.rsp, which will add the corresponding number of stages into the frontend to keep the IPC to 1.|
| injectorStage | Boolean | Add a stage between the frontend and the decode stage of the CPU to improve FMax. (busLatencyMin + injectorStage) should be at least two. |
@ -700,8 +688,9 @@ case class IBusSimpleBus(interfaceKeepData : Boolean) extends Bundle with IMaste
}
```
**Important** : There should be at least one cycle latency between que cmd and the rsp. The IBus.cmd can remove request when a CPU jump occure or when the CPU is halted by someting in the pipeline. As many arbitration aren't made for this behaviour, it is important to add a buffer to the iBus.cmd to avoid this. Ex : iBus.cmd.s2mPipe, which add a zero latency buffer and cut the iBus.cmd.ready path.
You can also do iBus.cmd.s2mPipe.m2sPipe, which will cut all combinatorial path of the bus but then as a latency of 1 cycle. which mean you should probably set the busLatencyMin to 2.
**Important** : Checkout the cmdForkPersistence parameter, because if it's not set, it can break the iBus compatibility with your memory system (unless you externaly add some buffers)
Setting cmdForkPersistence and cmdForkOnSecondStage improves iBus cmd timings.
Note that bridges are implemented to convert this interface into AXI4 and Avalon

View file

@ -31,40 +31,40 @@ object TestsWorkspace {
SpinalConfig(mergeAsyncProcess = false, anonymSignalPrefix = "zz_").generateVerilog {
val configFull = VexRiscvConfig(
plugins = List(
// new IBusSimplePlugin(
// resetVector = 0x80000000l,
// relaxedPcCalculation = false,
// relaxedBusCmdValid = false,
// prediction = NONE,
// historyRamSizeLog2 = 10,
// catchAccessFault = true,
// compressedGen = true,
// busLatencyMin = 1,
// injectorStage = true
// ),
new IBusCachedPlugin(
new IBusSimplePlugin(
resetVector = 0x80000000l,
compressedGen = false,
cmdForkOnSecondStage = true,
cmdForkPersistence = true,
prediction = NONE,
injectorStage = true,
config = InstructionCacheConfig(
cacheSize = 4096,
bytePerLine = 32,
wayCount = 1,
addressWidth = 32,
cpuDataWidth = 32,
memDataWidth = 32,
catchIllegalAccess = true,
catchAccessFault = true,
catchMemoryTranslationMiss = true,
asyncTagMemory = false,
twoCycleRam = false,
twoCycleCache = true
),
memoryTranslatorPortConfig = MemoryTranslatorPortConfig(
portTlbSize = 4
)
historyRamSizeLog2 = 10,
catchAccessFault = true,
compressedGen = false,
busLatencyMin = 1,
injectorStage = true
),
// new IBusCachedPlugin(
// resetVector = 0x80000000l,
// compressedGen = false,
// prediction = NONE,
// injectorStage = true,
// config = InstructionCacheConfig(
// cacheSize = 4096,
// bytePerLine = 32,
// wayCount = 1,
// addressWidth = 32,
// cpuDataWidth = 32,
// memDataWidth = 32,
// catchIllegalAccess = true,
// catchAccessFault = true,
// catchMemoryTranslationMiss = true,
// asyncTagMemory = false,
// twoCycleRam = false,
// twoCycleCache = true
// ),
// memoryTranslatorPortConfig = MemoryTranslatorPortConfig(
// portTlbSize = 4
// )
// ),
// new DBusSimplePlugin(
// catchAddressMisaligned = true,
// catchAccessFault = true,

View file

@ -15,7 +15,7 @@ object FormalSimple extends App{
new HaltOnExceptionPlugin,
new IBusSimplePlugin(
resetVector = 0x00000000l,
relaxedPcCalculation = false,
cmdForkOnSecondStage = false,
prediction = DYNAMIC_TARGET,
catchAccessFault = false,
compressedGen = true

View file

@ -18,7 +18,7 @@ object GenCustomCsr extends App{
new CustomCsrDemoGpioPlugin,
new IBusSimplePlugin(
resetVector = 0x00000000l,
relaxedPcCalculation = false,
cmdForkOnSecondStage = false,
prediction = NONE,
catchAccessFault = false,
compressedGen = false

View file

@ -14,7 +14,7 @@ object GenCustomSimdAdd extends App{
new SimdAddPlugin,
new IBusSimplePlugin(
resetVector = 0x00000000l,
relaxedPcCalculation = false,
cmdForkOnSecondStage = false,
prediction = NONE,
catchAccessFault = false,
compressedGen = false

View file

@ -13,7 +13,7 @@ object GenDeterministicVex extends App{
plugins = List(
new IBusSimplePlugin(
resetVector = 0x80000000l,
relaxedPcCalculation = false,
cmdForkOnSecondStage = false,
prediction = STATIC,
catchAccessFault = true,
compressedGen = false

View file

@ -14,7 +14,7 @@ object GenFullNoMmuNoCache extends App{
plugins = List(
new IBusSimplePlugin(
resetVector = 0x80000000l,
relaxedPcCalculation = false,
cmdForkOnSecondStage = false,
prediction = STATIC,
catchAccessFault = false,
compressedGen = false

View file

@ -14,7 +14,7 @@ object GenNoCacheNoMmuMaxPerf extends App{
plugins = List(
new IBusSimplePlugin(
resetVector = 0x80000000l,
relaxedPcCalculation = false,
cmdForkOnSecondStage = false,
prediction = DYNAMIC_TARGET,
historyRamSizeLog2 = 8,
catchAccessFault = true,

View file

@ -13,7 +13,7 @@ object GenSmallAndProductive extends App{
plugins = List(
new IBusSimplePlugin(
resetVector = 0x80000000l,
relaxedPcCalculation = false,
cmdForkOnSecondStage = false,
prediction = NONE,
catchAccessFault = false,
compressedGen = false

View file

@ -13,7 +13,7 @@ object GenSmallest extends App{
plugins = List(
new IBusSimplePlugin(
resetVector = 0x80000000l,
relaxedPcCalculation = false,
cmdForkOnSecondStage = false,
prediction = NONE,
catchAccessFault = false,
compressedGen = false

View file

@ -18,7 +18,7 @@ object GenSmallestNoCsr extends App{
new IBusSimplePlugin(
resetVector = 0x80000000l,
relaxedPcCalculation = false,
cmdForkOnSecondStage = false,
prediction = NONE,
catchAccessFault = false,
compressedGen = false

View file

@ -69,7 +69,8 @@ object MuraxConfig{
cpuPlugins = ArrayBuffer( //DebugPlugin added by the toplevel
new IBusSimplePlugin(
resetVector = if(withXip) 0xF001E000l else 0x80000000l,
relaxedPcCalculation = true,
cmdForkOnSecondStage = true,
cmdForkPersistence = withXip, //Required by the Xip controller
prediction = NONE,
catchAccessFault = false,
compressedGen = false
@ -227,7 +228,7 @@ case class Murax(config : MuraxConfig) extends Component{
val externalInterrupt = False
for(plugin <- cpu.plugins) plugin match{
case plugin : IBusSimplePlugin =>
mainBusArbiter.io.iBus.cmd <> plugin.iBus.cmd.halfPipe() //TODO !!
mainBusArbiter.io.iBus.cmd <> plugin.iBus.cmd
mainBusArbiter.io.iBus.rsp <> plugin.iBus.rsp
case plugin : DBusSimplePlugin => {
if(!pipelineDBus)
@ -496,7 +497,7 @@ object MuraxDhrystoneReadyMulDivStatic{
)
config.cpuPlugins += new IBusSimplePlugin(
resetVector = 0x80000000l,
relaxedPcCalculation = true,
cmdForkOnSecondStage = true,
prediction = STATIC,
catchAccessFault = false,
compressedGen = false

View file

@ -179,7 +179,7 @@ object MuraxSynthesisBench {
val rtls = List(murax, muraxFast)
val targets = IcestormStdTargets() ++ XilinxStdTargets(
val targets = IcestormStdTargets().take(1) ++ XilinxStdTargets(
vivadoArtix7Path = "/eda/Xilinx/Vivado/2017.2/bin"
) ++ AlteraStdTargets(
quartusCycloneIVPath = "/eda/intelFPGA_lite/17.0/quartus/bin/",

View file

@ -28,7 +28,7 @@ object VexRiscvAvalonForSim{
plugins = List(
new IBusSimplePlugin(
resetVector = 0x00000000l,
relaxedPcCalculation = false,
cmdForkOnSecondStage = false,
prediction = STATIC,
catchAccessFault = false,
compressedGen = false

View file

@ -731,6 +731,7 @@ class CsrPlugin(config : CsrPluginConfig) extends Plugin[VexRiscv] with Exceptio
}
}
decode.arbitration.haltByOther setWhen(List(execute,memory).map(s => s.arbitration.isValid && s.input(ENV_CTRL) === EnvCtrlEnum.XRET).orR)
execute plug new Area {
import execute._

View file

@ -17,6 +17,7 @@ abstract class IBusFetcherImpl(val catchAccessFault : Boolean,
val decodePcGen : Boolean,
val compressedGen : Boolean,
val cmdToRspStageCount : Int,
val pcRegReusedForSecondStage : Boolean,
val injectorReadyCutGen : Boolean,
val prediction : BranchPrediction,
val historyRamSizeLog2 : Int,
@ -221,7 +222,7 @@ abstract class IBusFetcherImpl(val catchAccessFault : Boolean,
}
for((s,sNext) <- (stages, stages.tail).zipped) {
if(s == stages.head) {
if(s == stages.head && pcRegReusedForSecondStage) {
sNext.input.arbitrationFrom(s.output.toEvent().m2sPipeWithFlush(flush, s != stages.head, collapsBubble = false))
sNext.input.payload := fetchPc.pcReg
fetchPc.propagatePc setWhen(sNext.input.fire)

View file

@ -24,6 +24,7 @@ class IBusCachedPlugin(resetVector : BigInt = 0x80000000l,
decodePcGen = compressedGen,
compressedGen = compressedGen,
cmdToRspStageCount = (if(config.twoCycleCache) 2 else 1) + (if(relaxedPcCalculation) 1 else 0),
pcRegReusedForSecondStage = true,
injectorReadyCutGen = false,
prediction = prediction,
historyRamSizeLog2 = historyRamSizeLog2,

View file

@ -144,7 +144,8 @@ case class IBusSimpleBus(interfaceKeepData : Boolean) extends Bundle with IMaste
class IBusSimplePlugin(resetVector : BigInt,
catchAccessFault : Boolean = false,
relaxedPcCalculation : Boolean = false,
cmdForkOnSecondStage : Boolean = false,
cmdForkPersistence : Boolean = false,
prediction : BranchPrediction = NONE,
historyRamSizeLog2 : Int = 10,
keepPcPlus4 : Boolean = false,
@ -158,7 +159,8 @@ class IBusSimplePlugin(resetVector : BigInt,
keepPcPlus4 = keepPcPlus4,
decodePcGen = compressedGen,
compressedGen = compressedGen,
cmdToRspStageCount = busLatencyMin + (if(relaxedPcCalculation) 1 else 0),
cmdToRspStageCount = busLatencyMin + (if(cmdForkOnSecondStage) 1 else 0),
pcRegReusedForSecondStage = !(cmdForkOnSecondStage && cmdForkPersistence),
injectorReadyCutGen = false,
prediction = prediction,
historyRamSizeLog2 = historyRamSizeLog2,
@ -182,40 +184,44 @@ class IBusSimplePlugin(resetVector : BigInt,
import pipeline.config._
pipeline plug new FetchArea(pipeline) {
var cmd = Stream(IBusSimpleCmd())
iBus.cmd << (if(cmdForkPersistence && !cmdForkOnSecondStage) cmd.s2mPipe() else cmd)
//Avoid sending to many iBus cmd
val pendingCmd = Reg(UInt(log2Up(pendingMax + 1) bits)) init (0)
val pendingCmdNext = pendingCmd + iBus.cmd.fire.asUInt - iBus.rsp.fire.asUInt
val pendingCmdNext = pendingCmd + cmd.fire.asUInt - iBus.rsp.fire.asUInt
pendingCmd := pendingCmdNext
val cmd = /*if(relaxedPcCalculation) new Area {
//This implementation keep the iBus.cmd on the bus until it's executed, even if the pipeline is flushed
val cmdFork = if(!cmdForkPersistence || !cmdForkOnSecondStage) new Area {
//This implementation keep the cmd on the bus until it's executed or the the pipeline is flushed
def stage = iBusRsp.stages(if(cmdForkOnSecondStage) 1 else 0)
stage.halt setWhen(stage.input.valid && (!cmd.valid || !cmd.ready))
cmd.valid := stage.input.valid && stage.output.ready && pendingCmd =/= pendingMax
cmd.pc := stage.input.payload(31 downto 2) @@ "00"
} else new Area{
//This implementation keep the cmd on the bus until it's executed, even if the pipeline is flushed
def stage = iBusRsp.stages(1)
stage.halt setWhen(iBus.cmd.isStall)
val cmdKeep = RegInit(False) setWhen(iBus.cmd.valid) clearWhen(iBus.cmd.ready)
val cmdFired = RegInit(False) setWhen(iBus.cmd.fire) clearWhen(stage.input.ready)
iBus.cmd.valid := (stage.input.valid || cmdKeep) && pendingCmd =/= pendingMax && !cmdFired
iBus.cmd.pc := stage.input.payload(31 downto 2) @@ "00"
} else */new Area {
//This implementation keep the iBus.cmd on the bus until it's executed or the the pipeline is flushed (not "safe")
def stage = iBusRsp.stages(if(relaxedPcCalculation) 1 else 0)
stage.halt setWhen(stage.input.valid && (!iBus.cmd.valid || !iBus.cmd.ready))
iBus.cmd.valid := stage.input.valid && stage.output.ready && pendingCmd =/= pendingMax
iBus.cmd.pc := stage.input.payload(31 downto 2) @@ "00"
val pendingFull = pendingCmd === pendingMax
val cmdKeep = RegInit(False) setWhen(cmd.valid) clearWhen(cmd.ready)
val cmdFired = RegInit(False) setWhen(cmd.fire) clearWhen(stage.input.ready)
stage.halt setWhen(cmd.isStall || (pendingFull && !cmdFired))
cmd.valid := (stage.input.valid || cmdKeep) && !pendingFull && !cmdFired
cmd.pc := stage.input.payload(31 downto 2) @@ "00"
}
val rsp = new Area {
val rspJoin = new Area {
import iBusRsp._
//Manage flush for iBus transactions in flight
val discardCounter = Reg(UInt(log2Up(pendingMax + 1) bits)) init (0)
discardCounter := discardCounter - (iBus.rsp.fire && discardCounter =/= 0).asUInt
when(flush) {
// discardCounter := (if(relaxedPcCalculation) pendingCmd + iBus.cmd.valid.asUInt - iBus.rsp.fire.asUInt else pendingCmd - iBus.rsp.fire.asUInt)
discardCounter := (if(relaxedPcCalculation) pendingCmdNext else pendingCmd - iBus.rsp.fire.asUInt)
if(cmdForkOnSecondStage && cmdForkPersistence)
discardCounter := pendingCmd + cmd.valid.asUInt - iBus.rsp.fire.asUInt
else
discardCounter := (if(cmdForkOnSecondStage) pendingCmdNext else pendingCmd - iBus.rsp.fire.asUInt)
}
val rspBuffer = StreamFifoLowLatency(IBusSimpleRsp(), busLatencyMin)
val rspBuffer = StreamFifoLowLatency(IBusSimpleRsp(), busLatencyMin + (if(cmdForkOnSecondStage && cmdForkPersistence) 1 else 0))
rspBuffer.io.push << iBus.rsp.throwWhen(discardCounter =/= 0).toStream
rspBuffer.io.flush := flush

View file

@ -266,14 +266,15 @@ class IBusDimension extends VexRiscvDimension("IBus") {
val injectorStage = r.nextBoolean() || latency == 1
val prediction = random(r, List(NONE, STATIC, DYNAMIC, DYNAMIC_TARGET))
val catchAll = universes.contains(VexRiscvUniverse.CATCH_ALL)
val relaxedPcCalculation = r.nextBoolean()
val relaxedBusCmdValid =false // r.nextBoolean() && relaxedPcCalculation && prediction != DYNAMIC_TARGET
new VexRiscvPosition("Simple" + latency + (if(relaxedPcCalculation) "Relax" else "") + (if(relaxedBusCmdValid) "Valid" else "") + (if(injectorStage) "InjStage" else "") + (if(compressed) "Rvc" else "") + prediction.getClass.getTypeName().replace("$","")) with InstructionAnticipatedPosition{
val cmdForkOnSecondStage = r.nextBoolean()
val cmdForkPersistence = r.nextBoolean()
val relaxedBusCmdValid = false // r.nextBoolean() && relaxedPcCalculation && prediction != DYNAMIC_TARGET
new VexRiscvPosition("Simple" + latency + (if(cmdForkOnSecondStage) "S2" else "") + (if(cmdForkPersistence) "P" else "") + (if(relaxedBusCmdValid) "Valid" else "") + (if(injectorStage) "InjStage" else "") + (if(compressed) "Rvc" else "") + prediction.getClass.getTypeName().replace("$","")) with InstructionAnticipatedPosition{
override def testParam = "IBUS=SIMPLE" + (if(compressed) " COMPRESSED=yes" else "")
override def applyOn(config: VexRiscvConfig): Unit = config.plugins += new IBusSimplePlugin(
resetVector = 0x80000000l,
relaxedPcCalculation = relaxedPcCalculation,
relaxedBusCmdValid = relaxedBusCmdValid,
cmdForkOnSecondStage = cmdForkOnSecondStage,
cmdForkPersistence = cmdForkPersistence,
prediction = prediction,
catchAccessFault = catchAll,
compressedGen = compressed,
@ -522,8 +523,10 @@ class TestIndividualFeatures extends FunSuite {
// val seed = -2412372746600605141l
// val testId = Some(mutable.HashSet[Int](1,6,11,17,23,24))
// val seed = -7309275932954927463l
// val testId = Some(mutable.HashSet[Int](6,11,31,32,53,55,56,64,82))
// val testId = Some(mutable.HashSet[Int](31))
// val seed = 971825313472546699l
val rand = new Random(seed)