Add Fetcher.predictionBuffer option to pipeline BRANCH_TARGET, higher FMax, about 1 ns critical path gain on Arty7 => 5 ns

This commit is contained in:
Charles Papon 2020-02-23 23:18:27 +01:00
parent 67d2071a32
commit fad09e805f
3 changed files with 46 additions and 20 deletions

View File

@ -22,7 +22,8 @@ abstract class IBusFetcherImpl(val resetVector : BigInt,
val historyRamSizeLog2 : Int, val historyRamSizeLog2 : Int,
val injectorStage : Boolean, val injectorStage : Boolean,
val relaxPredictorAddress : Boolean, val relaxPredictorAddress : Boolean,
val fetchRedoGen : Boolean) extends Plugin[VexRiscv] with JumpService with IBusFetcher{ val fetchRedoGen : Boolean,
val predictionBuffer : Boolean = true) extends Plugin[VexRiscv] with JumpService with IBusFetcher{
var prefetchExceptionPort : Flow[ExceptionCause] = null var prefetchExceptionPort : Flow[ExceptionCause] = null
var decodePrediction : DecodePredictionBus = null var decodePrediction : DecodePredictionBus = null
var fetchPrediction : FetchPredictionBus = null var fetchPrediction : FetchPredictionBus = null
@ -121,10 +122,12 @@ abstract class IBusFetcherImpl(val resetVector : BigInt,
//PC calculation without Jump //PC calculation without Jump
val output = Stream(UInt(32 bits)) val output = Stream(UInt(32 bits))
val pcReg = Reg(UInt(32 bits)) init(if(resetVector != null) resetVector else externalResetVector) addAttribute(Verilator.public) val pcReg = Reg(UInt(32 bits)) init(if(resetVector != null) resetVector else externalResetVector) addAttribute(Verilator.public)
val corrected = False val correction = False
val correctionReg = RegInit(False) setWhen(correction) clearWhen(output.fire)
val corrected = correction || correctionReg
val pcRegPropagate = False val pcRegPropagate = False
val booted = RegNext(True) init (False) val booted = RegNext(True) init (False)
val inc = RegInit(False) clearWhen(corrected || pcRegPropagate) setWhen(output.fire) clearWhen(!output.valid && output.ready) val inc = RegInit(False) clearWhen(correction || pcRegPropagate) setWhen(output.fire) clearWhen(!output.valid && output.ready)
val pc = pcReg + (inc ## B"00").asUInt val pc = pcReg + (inc ## B"00").asUInt
val predictionPcLoad = ifGen(prediction == DYNAMIC_TARGET) (Flow(UInt(32 bits))) val predictionPcLoad = ifGen(prediction == DYNAMIC_TARGET) (Flow(UInt(32 bits)))
val redo = (fetchRedoGen || prediction == DYNAMIC_TARGET) generate Flow(UInt(32 bits)) val redo = (fetchRedoGen || prediction == DYNAMIC_TARGET) generate Flow(UInt(32 bits))
@ -136,22 +139,22 @@ abstract class IBusFetcherImpl(val resetVector : BigInt,
if(predictionPcLoad != null) { if(predictionPcLoad != null) {
when(predictionPcLoad.valid) { when(predictionPcLoad.valid) {
corrected := True correction := True
pc := predictionPcLoad.payload pc := predictionPcLoad.payload
} }
} }
if(redo != null) when(redo.valid){ if(redo != null) when(redo.valid){
corrected := True correction := True
pc := redo.payload pc := redo.payload
flushed := True flushed := True
} }
when(jump.pcLoad.valid) { when(jump.pcLoad.valid) {
corrected := True correction := True
pc := jump.pcLoad.payload pc := jump.pcLoad.payload
flushed := True flushed := True
} }
when(booted && (output.ready || corrected || pcRegPropagate)){ when(booted && (output.ready || correction || pcRegPropagate)){
pcReg := pc pcReg := pc
} }
@ -506,17 +509,36 @@ abstract class IBusFetcherImpl(val resetVector : BigInt,
case class BranchPredictorLine() extends Bundle{ case class BranchPredictorLine() extends Bundle{
val source = Bits(30 - historyRamSizeLog2 bits) val source = Bits(30 - historyRamSizeLog2 bits)
val branchWish = UInt(2 bits) val branchWish = UInt(2 bits)
val target = UInt(32 bits)
val last2Bytes = ifGen(compressedGen)(Bool) val last2Bytes = ifGen(compressedGen)(Bool)
val target = UInt(32 bits)
} }
val history = Mem(BranchPredictorLine(), 1 << historyRamSizeLog2) val history = Mem(BranchPredictorLine(), 1 << historyRamSizeLog2)
val historyWrite = history.writePort val historyWriteDelayPatched = history.writePort
val historyWrite = cloneOf(historyWriteDelayPatched)
historyWriteDelayPatched.valid := historyWrite.valid
historyWriteDelayPatched.address := (if(predictionBuffer) historyWrite.address - 1 else historyWrite.address)
historyWriteDelayPatched.data := historyWrite.data
val writeLast = RegNextWhen(historyWriteDelayPatched, iBusRsp.stages(0).output.ready)
//Avoid write to read hazard //Avoid write to read hazard
val historyWriteLast = RegNextWhen(historyWrite, iBusRsp.stages(0).output.ready) val buffer = predictionBuffer generate new Area{
val hazard = historyWriteLast.valid && historyWriteLast.address === (iBusRsp.stages(1).input.payload >> 2).resized val line = history.readSync((iBusRsp.stages(0).input.payload >> 2).resized, iBusRsp.stages(0).output.ready)
val line = history.readSync((iBusRsp.stages(0).input.payload >> 2).resized, iBusRsp.stages(0).output.ready) val pcCorrected = RegNextWhen(fetchPc.corrected, iBusRsp.stages(0).input.ready)
val hazard = (writeLast.valid && writeLast.address === (iBusRsp.stages(1).input.payload >> 2).resized)
}
val (line, hazard) = predictionBuffer match {
case true =>
(RegNextWhen(buffer.line, iBusRsp.stages(0).output.ready),
RegNextWhen(buffer.hazard, iBusRsp.stages(0).output.ready) || buffer.pcCorrected)
case false =>
(history.readSync((iBusRsp.stages(0).input.payload >> 2).resized,
iBusRsp.stages(0).output.ready), writeLast.valid && writeLast.address === (iBusRsp.stages(1).input.payload >> 2).resized)
}
val hit = line.source === (iBusRsp.stages(1).input.payload.asBits >> 2 + historyRamSizeLog2) val hit = line.source === (iBusRsp.stages(1).input.payload.asBits >> 2 + historyRamSizeLog2)
if(compressedGen) hit clearWhen(!line.last2Bytes && iBusRsp.stages(1).input.payload(1)) if(compressedGen) hit clearWhen(!line.last2Bytes && iBusRsp.stages(1).input.payload(1))
@ -534,7 +556,7 @@ abstract class IBusFetcherImpl(val resetVector : BigInt,
fetchContext.hit := hit fetchContext.hit := hit
fetchContext.line := line fetchContext.line := line
val (decompressorContext, decompressorContextOutput, injectorContext) = stage1ToInjectorPipe(fetchContext) val (iBusRspContext, iBusRspContextOutput, injectorContext) = stage1ToInjectorPipe(fetchContext)
object PREDICTION_CONTEXT extends Stageable(PredictionResult()) object PREDICTION_CONTEXT extends Stageable(PredictionResult())
pipeline.decode.insert(PREDICTION_CONTEXT) := injectorContext pipeline.decode.insert(PREDICTION_CONTEXT) := injectorContext
@ -567,7 +589,7 @@ abstract class IBusFetcherImpl(val resetVector : BigInt,
historyWrite.valid clearWhen(branchContext.hazard || !branchStage.arbitration.isFiring) historyWrite.valid clearWhen(branchContext.hazard || !branchStage.arbitration.isFiring)
val compressor = compressedGen generate new Area{ val compressor = compressedGen generate new Area{
val predictionBranch = decompressorContext.hit && !decompressorContext.hazard && decompressorContext.line.branchWish(1) val predictionBranch = iBusRspContext.hit && !iBusRspContext.hazard && iBusRspContext.line.branchWish(1)
val unalignedWordIssue = iBusRsp.output.valid && predictionBranch && decompressor.throw2Bytes && !decompressor.isInputHighRvc val unalignedWordIssue = iBusRsp.output.valid && predictionBranch && decompressor.throw2Bytes && !decompressor.isInputHighRvc
when(unalignedWordIssue){ when(unalignedWordIssue){
@ -579,13 +601,13 @@ abstract class IBusFetcherImpl(val resetVector : BigInt,
} }
//Do not trigger prediction hit when it is one for the upper RVC word and we aren't there yet //Do not trigger prediction hit when it is one for the upper RVC word and we aren't there yet
decompressorContextOutput.hit clearWhen(decompressorContext.line.last2Bytes && (decompressor.bufferValid || (!decompressor.throw2Bytes && decompressor.isInputLowRvc))) iBusRspContextOutput.hit clearWhen(iBusRspContext.line.last2Bytes && (decompressor.bufferValid || (!decompressor.throw2Bytes && decompressor.isInputLowRvc)))
decodePc.predictionPcLoad.valid := injectorContext.line.branchWish.msb && injectorContext.hit && !injectorContext.hazard && injector.decodeInput.fire decodePc.predictionPcLoad.valid := injectorContext.line.branchWish.msb && injectorContext.hit && !injectorContext.hazard && injector.decodeInput.fire
decodePc.predictionPcLoad.payload := injectorContext.line.target decodePc.predictionPcLoad.payload := injectorContext.line.target
//Clean the RVC buffer when a prediction was made //Clean the RVC buffer when a prediction was made
when(decompressorContext.line.branchWish.msb && decompressorContextOutput.hit && !decompressorContext.hazard && decompressor.output.fire){ when(iBusRspContext.line.branchWish.msb && iBusRspContextOutput.hit && !iBusRspContext.hazard && decompressor.output.fire){
decompressor.bufferValid := False decompressor.bufferValid := False
decompressor.throw2BytesReg := False decompressor.throw2BytesReg := False
decompressor.input.ready := True //Drop the remaining byte if any decompressor.input.ready := True //Drop the remaining byte if any

View File

@ -35,7 +35,8 @@ class IBusCachedPlugin(resetVector : BigInt = 0x80000000l,
memoryTranslatorPortConfig : Any = null, memoryTranslatorPortConfig : Any = null,
injectorStage : Boolean = false, injectorStage : Boolean = false,
withoutInjectorStage : Boolean = false, withoutInjectorStage : Boolean = false,
relaxPredictorAddress : Boolean = true) extends IBusFetcherImpl( relaxPredictorAddress : Boolean = true,
predictionBuffer : Boolean = true) extends IBusFetcherImpl(
resetVector = resetVector, resetVector = resetVector,
keepPcPlus4 = keepPcPlus4, keepPcPlus4 = keepPcPlus4,
decodePcGen = compressedGen, decodePcGen = compressedGen,
@ -47,7 +48,8 @@ class IBusCachedPlugin(resetVector : BigInt = 0x80000000l,
historyRamSizeLog2 = historyRamSizeLog2, historyRamSizeLog2 = historyRamSizeLog2,
injectorStage = (!config.twoCycleCache && !withoutInjectorStage) || injectorStage, injectorStage = (!config.twoCycleCache && !withoutInjectorStage) || injectorStage,
relaxPredictorAddress = relaxPredictorAddress, relaxPredictorAddress = relaxPredictorAddress,
fetchRedoGen = true){ fetchRedoGen = true,
predictionBuffer = predictionBuffer){
import config._ import config._
assert(isPow2(cacheSize)) assert(isPow2(cacheSize))

View File

@ -233,7 +233,8 @@ class IBusSimplePlugin( resetVector : BigInt,
val rspHoldValue : Boolean = false, val rspHoldValue : Boolean = false,
val singleInstructionPipeline : Boolean = false, val singleInstructionPipeline : Boolean = false,
val memoryTranslatorPortConfig : Any = null, val memoryTranslatorPortConfig : Any = null,
relaxPredictorAddress : Boolean = true relaxPredictorAddress : Boolean = true,
predictionBuffer : Boolean = true
) extends IBusFetcherImpl( ) extends IBusFetcherImpl(
resetVector = resetVector, resetVector = resetVector,
keepPcPlus4 = keepPcPlus4, keepPcPlus4 = keepPcPlus4,
@ -246,7 +247,8 @@ class IBusSimplePlugin( resetVector : BigInt,
historyRamSizeLog2 = historyRamSizeLog2, historyRamSizeLog2 = historyRamSizeLog2,
injectorStage = injectorStage, injectorStage = injectorStage,
relaxPredictorAddress = relaxPredictorAddress, relaxPredictorAddress = relaxPredictorAddress,
fetchRedoGen = memoryTranslatorPortConfig != null){ fetchRedoGen = memoryTranslatorPortConfig != null,
predictionBuffer = predictionBuffer){
var iBus : IBusSimpleBus = null var iBus : IBusSimpleBus = null
var decodeExceptionPort : Flow[ExceptionCause] = null var decodeExceptionPort : Flow[ExceptionCause] = null