fpu fix cmd / commit race condition
This commit is contained in:
parent
636d53cf63
commit
4bdab667cc
|
@ -121,11 +121,12 @@ object TestsWorkspace {
|
|||
// cd buildroot-build/
|
||||
// make O=$PWD BR2_EXTERNAL=../buildroot-spinal-saxon -C ../buildroot saxon_regression_defconfig
|
||||
|
||||
//make clean all IBUS=CACHED IBUS_DATA_WIDTH=64 COMPRESSED=no DBUS=CACHED DBUS_LOAD_DATA_WIDTH=64 DBUS_STORE_DATA_WIDTH=64 LRSC=yes AMO=yes SUPERVISOR=yes DBUS_EXCLUSIVE=yes DBUS_INVALIDATE=yes MUL=yes DIV=yes RVF=yes RVD=yes DEBUG_PLUGIN=no TRACE=yes REDO=1 DEBUG=ye WITH_USER_IO=no FLOW_INFO=no TRACE_START=565000000000ll SEED=45
|
||||
|
||||
//make clean all IBUS=CACHED IBUS_DATA_WIDTH=64 COMPRESSED=no DBUS=CACHED DBUS_LOAD_DATA_WIDTH=64 DBUS_STORE_DATA_WIDTH=64 LRSC=yes AMO=yes SUPERVISOR=yes DBUS_EXCLUSIVE=yes DBUS_INVALIDATE=yes MUL=yes DIV=yes RVF=yes RVD=yes DEBUG_PLUGIN=no TRACE=yes REDO=100 DEBUG=ye WITH_USER_IO=no FLOW_INFO=no TRACE_START=5600000000000ll SEED=45 STOP_ON_ERROR=ye
|
||||
|
||||
// export IMAGES=/media/data/open/SaxonSoc/artyA7SmpUpdate/buildroot-regression/buildroot-build/images
|
||||
// make clean all IBUS=CACHED IBUS_DATA_WIDTH=64 COMPRESSED=no DBUS=CACHED DBUS_LOAD_DATA_WIDTH=64 DBUS_STORE_DATA_WIDTH=64 LRSC=yes AMO=yes SUPERVISOR=yes DBUS_EXCLUSIVE=yes DBUS_INVALIDATE=yes MUL=yes DIV=yes RVF=yes RVD=yes DEBUG_PLUGIN=no LINUX_SOC_SMP=yes EMULATOR=$IMAGES/fw_jump.bin VMLINUX=$IMAGES/Image DTB=$IMAGES/linux.dtb RAMDISK=$IMAGES/rootfs.cpio TRACE=ye REDO=1 DEBUG=ye WITH_USER_IO=no
|
||||
// make clean all IBUS=CACHED IBUS_DATA_WIDTH=64 COMPRESSED=no DBUS=CACHED DBUS_LOAD_DATA_WIDTH=64 DBUS_STORE_DATA_WIDTH=64 LRSC=yes AMO=yes SUPERVISOR=yes DBUS_EXCLUSIVE=yes DBUS_INVALIDATE=yes MUL=yes DIV=yes RVF=yes RVD=yes DEBUG_PLUGIN=no LINUX_SOC_SMP=yes EMULATOR=$IMAGES/fw_jump.bin VMLINUX=$IMAGES/Image DTB=$IMAGES/linux.dtb RAMDISK=$IMAGES/rootfs.cpio TRACE=yes REDO=1 DEBUG=ye WITH_USER_IO=no FLOW_INFO=yes TRACE_START=47000000000ll SEED=43
|
||||
// make clean all IBUS=CACHED IBUS_DATA_WIDTH=64 COMPRESSED=no DBUS=CACHED DBUS_LOAD_DATA_WIDTH=64 DBUS_STORE_DATA_WIDTH=64 LRSC=yes AMO=yes SUPERVISOR=yes DBUS_EXCLUSIVE=yes DBUS_INVALIDATE=yes MUL=yes DIV=yes RVF=yes RVD=yes DEBUG_PLUGIN=no LINUX_SOC_SMP=yes EMULATOR=$IMAGES/fw_jump.bin VMLINUX=$IMAGES/Image DTB=$IMAGES/linux.dtb RAMDISK=$IMAGES/rootfs.cpio TRACE=yes REDO=1 DEBUG=ye WITH_USER_IO=no FLOW_INFO=yes TRACE_START=47000000000ll SEED=45
|
||||
//make clean all IBUS=CACHED IBUS_DATA_WIDTH=64 COMPRESSED=no DBUS=CACHED DBUS_LOAD_DATA_WIDTH=64 DBUS_STORE_DATA_WIDTH=64 LRSC=yes AMO=yes SUPERVISOR=yes DBUS_EXCLUSIVE=yes DBUS_INVALIDATE=yes MUL=yes DIV=yes RVF=yes RVD=yes DEBUG_PLUGIN=no LINUX_SOC_SMP=yes EMULATOR=$IMAGES/fw_jump.bin VMLINUX=$IMAGES/Image DTB=$IMAGES/linux.dtb RAMDISK=$IMAGES/rootfs.cpio TRACE=yes REDO=1 DEBUG=ye WITH_USER_IO=no FLOW_INFO=yes TRACE_START=565000000ll SEED=45
|
||||
// make clean all IBUS=CACHED IBUS_DATA_WIDTH=64 COMPRESSED=no DBUS=CACHED DBUS_LOAD_DATA_WIDTH=64 DBUS_STORE_DATA_WIDTH=64 LRSC=yes AMO=yes SUPERVISOR=yes DBUS_EXCLUSIVE=yes DBUS_INVALIDATE=yes MUL=yes DIV=yes RVF=yes RVD=yes DEBUG_PLUGIN=no LINUX_SOC_SMP=yes EMULATOR=$IMAGES/fw_jump.bin VMLINUX=$IMAGES/Image DTB=$IMAGES/linux.dtb RAMDISK=$IMAGES/rootfs.cpio TRACE=yes REDO=1 DEBUG=ye WITH_USER_IO=no FLOW_INFO=no TRACE_START=565000000000ll SEED=45
|
||||
val config = VexRiscvSmpClusterGen.vexRiscvConfig(
|
||||
hartId = 0,
|
||||
ioRange = _ (31 downto 28) === 0xF,
|
||||
|
@ -139,7 +140,8 @@ object TestsWorkspace {
|
|||
dCacheWays = 2,
|
||||
withFloat = true,
|
||||
withDouble = true,
|
||||
externalFpu = false
|
||||
externalFpu = false,
|
||||
simHalt = true
|
||||
)
|
||||
|
||||
|
||||
|
|
|
@ -167,7 +167,8 @@ object VexRiscvSmpClusterGen {
|
|||
withSupervisor : Boolean = true,
|
||||
withFloat : Boolean = false,
|
||||
withDouble : Boolean = false,
|
||||
externalFpu : Boolean = true
|
||||
externalFpu : Boolean = true,
|
||||
simHalt : Boolean = false
|
||||
) = {
|
||||
assert(iCacheSize/iCacheWays <= 4096, "Instruction cache ways can't be bigger than 4096 bytes")
|
||||
assert(dCacheSize/dCacheWays <= 4096, "Data cache ways can't be bigger than 4096 bytes")
|
||||
|
@ -280,6 +281,7 @@ object VexRiscvSmpClusterGen {
|
|||
|
||||
if(withFloat) config.plugins += new FpuPlugin(
|
||||
externalFpu = externalFpu,
|
||||
simHalt = simHalt,
|
||||
p = FpuParameter(withDouble = withDouble)
|
||||
)
|
||||
config
|
||||
|
|
|
@ -179,25 +179,10 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
|||
})
|
||||
}
|
||||
|
||||
// val completion = for(source <- 0 until portCount) yield new Area{
|
||||
// def port = io.port(source)
|
||||
// port.completion.flag.NV := False
|
||||
// port.completion.flag.DZ := False
|
||||
// port.completion.flag.OF := False
|
||||
// port.completion.flag.UF := False
|
||||
// port.completion.flag.NX := False
|
||||
//
|
||||
// val increments = ArrayBuffer[Bool]()
|
||||
//
|
||||
// afterElaboration{
|
||||
// port.completion.count := increments.map(_.asUInt.resize(log2Up(increments.size + 1))).reduceBalancedTree(_ + _)
|
||||
// }
|
||||
// }
|
||||
|
||||
val commitFork = new Area{
|
||||
val load, commit = Vec(Stream(FpuCommit(p)), portCount)
|
||||
for(i <- 0 until portCount){
|
||||
val fork = new StreamFork(FpuCommit(p), 2)
|
||||
val fork = new StreamFork(FpuCommit(p), 2, synchronous = true)
|
||||
fork.io.input << io.port(i).commit
|
||||
fork.io.outputs(0) >> load(i)
|
||||
fork.io.outputs(1).pipelined(m2s = true, s2m = true) >> commit(i) //Pipelining here is light, as it only use the flags of the payload
|
||||
|
@ -214,8 +199,9 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
|||
}
|
||||
|
||||
class CommitArea(source : Int) extends Area{
|
||||
val pending = new Tracker(4)
|
||||
val add, mul, div, sqrt, short = new Tracker(4)
|
||||
val input = commitFork.commit(source).haltWhen(List(add, mul, div, sqrt, short).map(_.full).orR).toFlow
|
||||
val input = commitFork.commit(source).haltWhen(List(add, mul, div, sqrt, short).map(_.full).orR || !pending.notEmpty).toFlow
|
||||
|
||||
when(input.fire){
|
||||
add.inc setWhen(List(FpuOpcode.ADD).map(input.opcode === _).orR)
|
||||
|
@ -224,6 +210,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
|||
sqrt.inc setWhen(List(FpuOpcode.SQRT).map(input.opcode === _).orR)
|
||||
short.inc setWhen(List(FpuOpcode.SGNJ, FpuOpcode.MIN_MAX, FpuOpcode.FCVT_X_X).map(input.opcode === _).orR)
|
||||
rf.scoreboards(source).writes(input.rd) := input.write
|
||||
pending.dec := True
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -237,7 +224,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
|||
|
||||
val scheduler = for(portId <- 0 until portCount;
|
||||
scoreboard = rf.scoreboards(portId)) yield new Area{
|
||||
val input = io.port(portId).cmd.combStage()
|
||||
val input = io.port(portId).cmd.pipelined(s2m = true)
|
||||
val useRs1, useRs2, useRs3, useRd = False
|
||||
switch(input.opcode){
|
||||
is(p.Opcode.LOAD) { useRd := True }
|
||||
|
@ -265,7 +252,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
|||
val rfBusy = (rfHits, rfTargets).zipped.map(_ ^ _)
|
||||
|
||||
val hits = (0 to 3).map(id => uses(id) && rfBusy(id))
|
||||
val hazard = hits.orR || !rf.init.done
|
||||
val hazard = hits.orR || !rf.init.done || commitLogic(portId).pending.full
|
||||
val output = input.haltWhen(hazard)
|
||||
when(input.valid && rf.init.done){
|
||||
scoreboard.targetWrite.address := input.rd
|
||||
|
@ -273,6 +260,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
|||
}
|
||||
when(output.fire && useRd){
|
||||
scoreboard.targetWrite.valid := True
|
||||
commitLogic(portId).pending.inc := True
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -287,7 +275,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
|||
}
|
||||
|
||||
val read = new Area{
|
||||
val s0 = cmdArbiter.output.pipelined(m2s = true, s2m = true) //TODO may need to remove m2s for store latency
|
||||
val s0 = cmdArbiter.output.pipelined() //TODO may need to remove m2s for store latency
|
||||
val s1 = s0.m2sPipe()
|
||||
val output = s1.swapPayload(RfReadOutput())
|
||||
val rs1Entry = rf.ram.readSync(s0.source @@ s0.rs1,enable = !output.isStall)
|
||||
|
@ -982,7 +970,6 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
|||
|
||||
when(exp(exp.getWidth-3, 3 bits) >= 5) { output.exponent(p.internalExponentSize-2, 2 bits) := 3 }
|
||||
|
||||
// val flag = io.port(input.source).completion.flag
|
||||
when(forceNan) {
|
||||
output.setNanQuiet
|
||||
NV setWhen(infinitynan || input.rs1.isNanSignaling || input.rs2.isNanSignaling)
|
||||
|
@ -1479,8 +1466,6 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
|||
if (p.withDouble) output.format := input.format
|
||||
output.scrap := (mantissa(1) | mantissa(0) | roundingScrap)
|
||||
|
||||
|
||||
// val flag = io.port(input.source).completion.flag
|
||||
output.NV := infinityNan || input.rs1.isNanSignaling || input.rs2.isNanSignaling
|
||||
output.DZ := False
|
||||
when(forceNan) {
|
||||
|
|
|
@ -10,6 +10,7 @@ import vexriscv.ip.fpu._
|
|||
import scala.collection.mutable.ArrayBuffer
|
||||
|
||||
class FpuPlugin(externalFpu : Boolean = false,
|
||||
simHalt : Boolean = false,
|
||||
p : FpuParameter) extends Plugin[VexRiscv] with VexRiscvRegressionArg {
|
||||
|
||||
object FPU_ENABLE extends Stageable(Bool())
|
||||
|
@ -222,10 +223,20 @@ class FpuPlugin(externalFpu : Boolean = false,
|
|||
|
||||
val internal = (!externalFpu).generate (pipeline plug new Area{
|
||||
val fpu = FpuCore(1, p)
|
||||
if(simHalt) {
|
||||
val cmdHalt = in(Bool).setName("fpuCmdHalt").addAttribute(Verilator.public)
|
||||
val commitHalt = in(Bool).setName("fpuCommitHalt").addAttribute(Verilator.public)
|
||||
val rspHalt = in(Bool).setName("fpuRspHalt").addAttribute(Verilator.public)
|
||||
fpu.io.port(0).cmd << port.cmd.haltWhen(cmdHalt)
|
||||
fpu.io.port(0).commit << port.commit.haltWhen(commitHalt)
|
||||
fpu.io.port(0).rsp.haltWhen(rspHalt) >> port.rsp
|
||||
fpu.io.port(0).completion <> port.completion
|
||||
} else {
|
||||
fpu.io.port(0).cmd << port.cmd
|
||||
fpu.io.port(0).commit << port.commit
|
||||
fpu.io.port(0).rsp >> port.rsp
|
||||
fpu.io.port(0).completion <> port.completion
|
||||
}
|
||||
})
|
||||
|
||||
|
||||
|
|
|
@ -1848,6 +1848,11 @@ public:
|
|||
instanceCycles += 1;
|
||||
|
||||
for(SimElement* simElement : simElements) simElement->postCycle();
|
||||
#ifdef RVF
|
||||
top->fpuCmdHalt = VL_RANDOM_I(1);
|
||||
top->fpuCommitHalt = VL_RANDOM_I(1);
|
||||
top->fpuRspHalt = VL_RANDOM_I(1);
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
@ -3815,10 +3820,10 @@ string riscvTestMemory[] = {
|
|||
|
||||
|
||||
string riscvTestFloat[] = {
|
||||
"rv32uf-p-fmadd",
|
||||
"rv32uf-p-fadd",
|
||||
"rv32uf-p-fcmp",
|
||||
"rv32uf-p-fcvt_w",
|
||||
"rv32uf-p-fmadd",
|
||||
"rv32uf-p-ldst",
|
||||
"rv32uf-p-recoding",
|
||||
"rv32uf-p-fclass",
|
||||
|
@ -3830,9 +3835,9 @@ string riscvTestFloat[] = {
|
|||
|
||||
|
||||
string riscvTestDouble[] = {
|
||||
"rv32ud-p-fmadd",
|
||||
"rv32ud-p-fadd",
|
||||
"rv32ud-p-fcvt",
|
||||
"rv32ud-p-fmadd",
|
||||
"rv32ud-p-recoding",
|
||||
"rv32ud-p-fclass",
|
||||
"rv32ud-p-fcvt_w",
|
||||
|
|
Loading…
Reference in New Issue