fpu fix cmd / commit race condition
This commit is contained in:
parent
636d53cf63
commit
4bdab667cc
|
@ -121,11 +121,12 @@ object TestsWorkspace {
|
||||||
// cd buildroot-build/
|
// cd buildroot-build/
|
||||||
// make O=$PWD BR2_EXTERNAL=../buildroot-spinal-saxon -C ../buildroot saxon_regression_defconfig
|
// make O=$PWD BR2_EXTERNAL=../buildroot-spinal-saxon -C ../buildroot saxon_regression_defconfig
|
||||||
|
|
||||||
|
//make clean all IBUS=CACHED IBUS_DATA_WIDTH=64 COMPRESSED=no DBUS=CACHED DBUS_LOAD_DATA_WIDTH=64 DBUS_STORE_DATA_WIDTH=64 LRSC=yes AMO=yes SUPERVISOR=yes DBUS_EXCLUSIVE=yes DBUS_INVALIDATE=yes MUL=yes DIV=yes RVF=yes RVD=yes DEBUG_PLUGIN=no TRACE=yes REDO=1 DEBUG=ye WITH_USER_IO=no FLOW_INFO=no TRACE_START=565000000000ll SEED=45
|
||||||
|
|
||||||
|
//make clean all IBUS=CACHED IBUS_DATA_WIDTH=64 COMPRESSED=no DBUS=CACHED DBUS_LOAD_DATA_WIDTH=64 DBUS_STORE_DATA_WIDTH=64 LRSC=yes AMO=yes SUPERVISOR=yes DBUS_EXCLUSIVE=yes DBUS_INVALIDATE=yes MUL=yes DIV=yes RVF=yes RVD=yes DEBUG_PLUGIN=no TRACE=yes REDO=100 DEBUG=ye WITH_USER_IO=no FLOW_INFO=no TRACE_START=5600000000000ll SEED=45 STOP_ON_ERROR=ye
|
||||||
|
|
||||||
// export IMAGES=/media/data/open/SaxonSoc/artyA7SmpUpdate/buildroot-regression/buildroot-build/images
|
// export IMAGES=/media/data/open/SaxonSoc/artyA7SmpUpdate/buildroot-regression/buildroot-build/images
|
||||||
// make clean all IBUS=CACHED IBUS_DATA_WIDTH=64 COMPRESSED=no DBUS=CACHED DBUS_LOAD_DATA_WIDTH=64 DBUS_STORE_DATA_WIDTH=64 LRSC=yes AMO=yes SUPERVISOR=yes DBUS_EXCLUSIVE=yes DBUS_INVALIDATE=yes MUL=yes DIV=yes RVF=yes RVD=yes DEBUG_PLUGIN=no LINUX_SOC_SMP=yes EMULATOR=$IMAGES/fw_jump.bin VMLINUX=$IMAGES/Image DTB=$IMAGES/linux.dtb RAMDISK=$IMAGES/rootfs.cpio TRACE=ye REDO=1 DEBUG=ye WITH_USER_IO=no
|
// make clean all IBUS=CACHED IBUS_DATA_WIDTH=64 COMPRESSED=no DBUS=CACHED DBUS_LOAD_DATA_WIDTH=64 DBUS_STORE_DATA_WIDTH=64 LRSC=yes AMO=yes SUPERVISOR=yes DBUS_EXCLUSIVE=yes DBUS_INVALIDATE=yes MUL=yes DIV=yes RVF=yes RVD=yes DEBUG_PLUGIN=no LINUX_SOC_SMP=yes EMULATOR=$IMAGES/fw_jump.bin VMLINUX=$IMAGES/Image DTB=$IMAGES/linux.dtb RAMDISK=$IMAGES/rootfs.cpio TRACE=yes REDO=1 DEBUG=ye WITH_USER_IO=no FLOW_INFO=no TRACE_START=565000000000ll SEED=45
|
||||||
// make clean all IBUS=CACHED IBUS_DATA_WIDTH=64 COMPRESSED=no DBUS=CACHED DBUS_LOAD_DATA_WIDTH=64 DBUS_STORE_DATA_WIDTH=64 LRSC=yes AMO=yes SUPERVISOR=yes DBUS_EXCLUSIVE=yes DBUS_INVALIDATE=yes MUL=yes DIV=yes RVF=yes RVD=yes DEBUG_PLUGIN=no LINUX_SOC_SMP=yes EMULATOR=$IMAGES/fw_jump.bin VMLINUX=$IMAGES/Image DTB=$IMAGES/linux.dtb RAMDISK=$IMAGES/rootfs.cpio TRACE=yes REDO=1 DEBUG=ye WITH_USER_IO=no FLOW_INFO=yes TRACE_START=47000000000ll SEED=43
|
|
||||||
// make clean all IBUS=CACHED IBUS_DATA_WIDTH=64 COMPRESSED=no DBUS=CACHED DBUS_LOAD_DATA_WIDTH=64 DBUS_STORE_DATA_WIDTH=64 LRSC=yes AMO=yes SUPERVISOR=yes DBUS_EXCLUSIVE=yes DBUS_INVALIDATE=yes MUL=yes DIV=yes RVF=yes RVD=yes DEBUG_PLUGIN=no LINUX_SOC_SMP=yes EMULATOR=$IMAGES/fw_jump.bin VMLINUX=$IMAGES/Image DTB=$IMAGES/linux.dtb RAMDISK=$IMAGES/rootfs.cpio TRACE=yes REDO=1 DEBUG=ye WITH_USER_IO=no FLOW_INFO=yes TRACE_START=47000000000ll SEED=45
|
|
||||||
//make clean all IBUS=CACHED IBUS_DATA_WIDTH=64 COMPRESSED=no DBUS=CACHED DBUS_LOAD_DATA_WIDTH=64 DBUS_STORE_DATA_WIDTH=64 LRSC=yes AMO=yes SUPERVISOR=yes DBUS_EXCLUSIVE=yes DBUS_INVALIDATE=yes MUL=yes DIV=yes RVF=yes RVD=yes DEBUG_PLUGIN=no LINUX_SOC_SMP=yes EMULATOR=$IMAGES/fw_jump.bin VMLINUX=$IMAGES/Image DTB=$IMAGES/linux.dtb RAMDISK=$IMAGES/rootfs.cpio TRACE=yes REDO=1 DEBUG=ye WITH_USER_IO=no FLOW_INFO=yes TRACE_START=565000000ll SEED=45
|
|
||||||
val config = VexRiscvSmpClusterGen.vexRiscvConfig(
|
val config = VexRiscvSmpClusterGen.vexRiscvConfig(
|
||||||
hartId = 0,
|
hartId = 0,
|
||||||
ioRange = _ (31 downto 28) === 0xF,
|
ioRange = _ (31 downto 28) === 0xF,
|
||||||
|
@ -139,7 +140,8 @@ object TestsWorkspace {
|
||||||
dCacheWays = 2,
|
dCacheWays = 2,
|
||||||
withFloat = true,
|
withFloat = true,
|
||||||
withDouble = true,
|
withDouble = true,
|
||||||
externalFpu = false
|
externalFpu = false,
|
||||||
|
simHalt = true
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -167,7 +167,8 @@ object VexRiscvSmpClusterGen {
|
||||||
withSupervisor : Boolean = true,
|
withSupervisor : Boolean = true,
|
||||||
withFloat : Boolean = false,
|
withFloat : Boolean = false,
|
||||||
withDouble : Boolean = false,
|
withDouble : Boolean = false,
|
||||||
externalFpu : Boolean = true
|
externalFpu : Boolean = true,
|
||||||
|
simHalt : Boolean = false
|
||||||
) = {
|
) = {
|
||||||
assert(iCacheSize/iCacheWays <= 4096, "Instruction cache ways can't be bigger than 4096 bytes")
|
assert(iCacheSize/iCacheWays <= 4096, "Instruction cache ways can't be bigger than 4096 bytes")
|
||||||
assert(dCacheSize/dCacheWays <= 4096, "Data cache ways can't be bigger than 4096 bytes")
|
assert(dCacheSize/dCacheWays <= 4096, "Data cache ways can't be bigger than 4096 bytes")
|
||||||
|
@ -280,6 +281,7 @@ object VexRiscvSmpClusterGen {
|
||||||
|
|
||||||
if(withFloat) config.plugins += new FpuPlugin(
|
if(withFloat) config.plugins += new FpuPlugin(
|
||||||
externalFpu = externalFpu,
|
externalFpu = externalFpu,
|
||||||
|
simHalt = simHalt,
|
||||||
p = FpuParameter(withDouble = withDouble)
|
p = FpuParameter(withDouble = withDouble)
|
||||||
)
|
)
|
||||||
config
|
config
|
||||||
|
|
|
@ -179,25 +179,10 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
// val completion = for(source <- 0 until portCount) yield new Area{
|
|
||||||
// def port = io.port(source)
|
|
||||||
// port.completion.flag.NV := False
|
|
||||||
// port.completion.flag.DZ := False
|
|
||||||
// port.completion.flag.OF := False
|
|
||||||
// port.completion.flag.UF := False
|
|
||||||
// port.completion.flag.NX := False
|
|
||||||
//
|
|
||||||
// val increments = ArrayBuffer[Bool]()
|
|
||||||
//
|
|
||||||
// afterElaboration{
|
|
||||||
// port.completion.count := increments.map(_.asUInt.resize(log2Up(increments.size + 1))).reduceBalancedTree(_ + _)
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
|
|
||||||
val commitFork = new Area{
|
val commitFork = new Area{
|
||||||
val load, commit = Vec(Stream(FpuCommit(p)), portCount)
|
val load, commit = Vec(Stream(FpuCommit(p)), portCount)
|
||||||
for(i <- 0 until portCount){
|
for(i <- 0 until portCount){
|
||||||
val fork = new StreamFork(FpuCommit(p), 2)
|
val fork = new StreamFork(FpuCommit(p), 2, synchronous = true)
|
||||||
fork.io.input << io.port(i).commit
|
fork.io.input << io.port(i).commit
|
||||||
fork.io.outputs(0) >> load(i)
|
fork.io.outputs(0) >> load(i)
|
||||||
fork.io.outputs(1).pipelined(m2s = true, s2m = true) >> commit(i) //Pipelining here is light, as it only use the flags of the payload
|
fork.io.outputs(1).pipelined(m2s = true, s2m = true) >> commit(i) //Pipelining here is light, as it only use the flags of the payload
|
||||||
|
@ -214,8 +199,9 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
}
|
}
|
||||||
|
|
||||||
class CommitArea(source : Int) extends Area{
|
class CommitArea(source : Int) extends Area{
|
||||||
|
val pending = new Tracker(4)
|
||||||
val add, mul, div, sqrt, short = new Tracker(4)
|
val add, mul, div, sqrt, short = new Tracker(4)
|
||||||
val input = commitFork.commit(source).haltWhen(List(add, mul, div, sqrt, short).map(_.full).orR).toFlow
|
val input = commitFork.commit(source).haltWhen(List(add, mul, div, sqrt, short).map(_.full).orR || !pending.notEmpty).toFlow
|
||||||
|
|
||||||
when(input.fire){
|
when(input.fire){
|
||||||
add.inc setWhen(List(FpuOpcode.ADD).map(input.opcode === _).orR)
|
add.inc setWhen(List(FpuOpcode.ADD).map(input.opcode === _).orR)
|
||||||
|
@ -224,6 +210,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
sqrt.inc setWhen(List(FpuOpcode.SQRT).map(input.opcode === _).orR)
|
sqrt.inc setWhen(List(FpuOpcode.SQRT).map(input.opcode === _).orR)
|
||||||
short.inc setWhen(List(FpuOpcode.SGNJ, FpuOpcode.MIN_MAX, FpuOpcode.FCVT_X_X).map(input.opcode === _).orR)
|
short.inc setWhen(List(FpuOpcode.SGNJ, FpuOpcode.MIN_MAX, FpuOpcode.FCVT_X_X).map(input.opcode === _).orR)
|
||||||
rf.scoreboards(source).writes(input.rd) := input.write
|
rf.scoreboards(source).writes(input.rd) := input.write
|
||||||
|
pending.dec := True
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -237,7 +224,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
|
|
||||||
val scheduler = for(portId <- 0 until portCount;
|
val scheduler = for(portId <- 0 until portCount;
|
||||||
scoreboard = rf.scoreboards(portId)) yield new Area{
|
scoreboard = rf.scoreboards(portId)) yield new Area{
|
||||||
val input = io.port(portId).cmd.combStage()
|
val input = io.port(portId).cmd.pipelined(s2m = true)
|
||||||
val useRs1, useRs2, useRs3, useRd = False
|
val useRs1, useRs2, useRs3, useRd = False
|
||||||
switch(input.opcode){
|
switch(input.opcode){
|
||||||
is(p.Opcode.LOAD) { useRd := True }
|
is(p.Opcode.LOAD) { useRd := True }
|
||||||
|
@ -265,7 +252,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
val rfBusy = (rfHits, rfTargets).zipped.map(_ ^ _)
|
val rfBusy = (rfHits, rfTargets).zipped.map(_ ^ _)
|
||||||
|
|
||||||
val hits = (0 to 3).map(id => uses(id) && rfBusy(id))
|
val hits = (0 to 3).map(id => uses(id) && rfBusy(id))
|
||||||
val hazard = hits.orR || !rf.init.done
|
val hazard = hits.orR || !rf.init.done || commitLogic(portId).pending.full
|
||||||
val output = input.haltWhen(hazard)
|
val output = input.haltWhen(hazard)
|
||||||
when(input.valid && rf.init.done){
|
when(input.valid && rf.init.done){
|
||||||
scoreboard.targetWrite.address := input.rd
|
scoreboard.targetWrite.address := input.rd
|
||||||
|
@ -273,6 +260,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
}
|
}
|
||||||
when(output.fire && useRd){
|
when(output.fire && useRd){
|
||||||
scoreboard.targetWrite.valid := True
|
scoreboard.targetWrite.valid := True
|
||||||
|
commitLogic(portId).pending.inc := True
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -287,7 +275,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
}
|
}
|
||||||
|
|
||||||
val read = new Area{
|
val read = new Area{
|
||||||
val s0 = cmdArbiter.output.pipelined(m2s = true, s2m = true) //TODO may need to remove m2s for store latency
|
val s0 = cmdArbiter.output.pipelined() //TODO may need to remove m2s for store latency
|
||||||
val s1 = s0.m2sPipe()
|
val s1 = s0.m2sPipe()
|
||||||
val output = s1.swapPayload(RfReadOutput())
|
val output = s1.swapPayload(RfReadOutput())
|
||||||
val rs1Entry = rf.ram.readSync(s0.source @@ s0.rs1,enable = !output.isStall)
|
val rs1Entry = rf.ram.readSync(s0.source @@ s0.rs1,enable = !output.isStall)
|
||||||
|
@ -982,7 +970,6 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
|
|
||||||
when(exp(exp.getWidth-3, 3 bits) >= 5) { output.exponent(p.internalExponentSize-2, 2 bits) := 3 }
|
when(exp(exp.getWidth-3, 3 bits) >= 5) { output.exponent(p.internalExponentSize-2, 2 bits) := 3 }
|
||||||
|
|
||||||
// val flag = io.port(input.source).completion.flag
|
|
||||||
when(forceNan) {
|
when(forceNan) {
|
||||||
output.setNanQuiet
|
output.setNanQuiet
|
||||||
NV setWhen(infinitynan || input.rs1.isNanSignaling || input.rs2.isNanSignaling)
|
NV setWhen(infinitynan || input.rs1.isNanSignaling || input.rs2.isNanSignaling)
|
||||||
|
@ -1479,8 +1466,6 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
if (p.withDouble) output.format := input.format
|
if (p.withDouble) output.format := input.format
|
||||||
output.scrap := (mantissa(1) | mantissa(0) | roundingScrap)
|
output.scrap := (mantissa(1) | mantissa(0) | roundingScrap)
|
||||||
|
|
||||||
|
|
||||||
// val flag = io.port(input.source).completion.flag
|
|
||||||
output.NV := infinityNan || input.rs1.isNanSignaling || input.rs2.isNanSignaling
|
output.NV := infinityNan || input.rs1.isNanSignaling || input.rs2.isNanSignaling
|
||||||
output.DZ := False
|
output.DZ := False
|
||||||
when(forceNan) {
|
when(forceNan) {
|
||||||
|
|
|
@ -10,6 +10,7 @@ import vexriscv.ip.fpu._
|
||||||
import scala.collection.mutable.ArrayBuffer
|
import scala.collection.mutable.ArrayBuffer
|
||||||
|
|
||||||
class FpuPlugin(externalFpu : Boolean = false,
|
class FpuPlugin(externalFpu : Boolean = false,
|
||||||
|
simHalt : Boolean = false,
|
||||||
p : FpuParameter) extends Plugin[VexRiscv] with VexRiscvRegressionArg {
|
p : FpuParameter) extends Plugin[VexRiscv] with VexRiscvRegressionArg {
|
||||||
|
|
||||||
object FPU_ENABLE extends Stageable(Bool())
|
object FPU_ENABLE extends Stageable(Bool())
|
||||||
|
@ -222,10 +223,20 @@ class FpuPlugin(externalFpu : Boolean = false,
|
||||||
|
|
||||||
val internal = (!externalFpu).generate (pipeline plug new Area{
|
val internal = (!externalFpu).generate (pipeline plug new Area{
|
||||||
val fpu = FpuCore(1, p)
|
val fpu = FpuCore(1, p)
|
||||||
fpu.io.port(0).cmd << port.cmd
|
if(simHalt) {
|
||||||
fpu.io.port(0).commit << port.commit
|
val cmdHalt = in(Bool).setName("fpuCmdHalt").addAttribute(Verilator.public)
|
||||||
fpu.io.port(0).rsp >> port.rsp
|
val commitHalt = in(Bool).setName("fpuCommitHalt").addAttribute(Verilator.public)
|
||||||
fpu.io.port(0).completion <> port.completion
|
val rspHalt = in(Bool).setName("fpuRspHalt").addAttribute(Verilator.public)
|
||||||
|
fpu.io.port(0).cmd << port.cmd.haltWhen(cmdHalt)
|
||||||
|
fpu.io.port(0).commit << port.commit.haltWhen(commitHalt)
|
||||||
|
fpu.io.port(0).rsp.haltWhen(rspHalt) >> port.rsp
|
||||||
|
fpu.io.port(0).completion <> port.completion
|
||||||
|
} else {
|
||||||
|
fpu.io.port(0).cmd << port.cmd
|
||||||
|
fpu.io.port(0).commit << port.commit
|
||||||
|
fpu.io.port(0).rsp >> port.rsp
|
||||||
|
fpu.io.port(0).completion <> port.completion
|
||||||
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1848,6 +1848,11 @@ public:
|
||||||
instanceCycles += 1;
|
instanceCycles += 1;
|
||||||
|
|
||||||
for(SimElement* simElement : simElements) simElement->postCycle();
|
for(SimElement* simElement : simElements) simElement->postCycle();
|
||||||
|
#ifdef RVF
|
||||||
|
top->fpuCmdHalt = VL_RANDOM_I(1);
|
||||||
|
top->fpuCommitHalt = VL_RANDOM_I(1);
|
||||||
|
top->fpuRspHalt = VL_RANDOM_I(1);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -3815,10 +3820,10 @@ string riscvTestMemory[] = {
|
||||||
|
|
||||||
|
|
||||||
string riscvTestFloat[] = {
|
string riscvTestFloat[] = {
|
||||||
|
"rv32uf-p-fmadd",
|
||||||
"rv32uf-p-fadd",
|
"rv32uf-p-fadd",
|
||||||
"rv32uf-p-fcmp",
|
"rv32uf-p-fcmp",
|
||||||
"rv32uf-p-fcvt_w",
|
"rv32uf-p-fcvt_w",
|
||||||
"rv32uf-p-fmadd",
|
|
||||||
"rv32uf-p-ldst",
|
"rv32uf-p-ldst",
|
||||||
"rv32uf-p-recoding",
|
"rv32uf-p-recoding",
|
||||||
"rv32uf-p-fclass",
|
"rv32uf-p-fclass",
|
||||||
|
@ -3830,9 +3835,9 @@ string riscvTestFloat[] = {
|
||||||
|
|
||||||
|
|
||||||
string riscvTestDouble[] = {
|
string riscvTestDouble[] = {
|
||||||
|
"rv32ud-p-fmadd",
|
||||||
"rv32ud-p-fadd",
|
"rv32ud-p-fadd",
|
||||||
"rv32ud-p-fcvt",
|
"rv32ud-p-fcvt",
|
||||||
"rv32ud-p-fmadd",
|
|
||||||
"rv32ud-p-recoding",
|
"rv32ud-p-recoding",
|
||||||
"rv32ud-p-fclass",
|
"rv32ud-p-fclass",
|
||||||
"rv32ud-p-fcvt_w",
|
"rv32ud-p-fcvt_w",
|
||||||
|
|
Loading…
Reference in New Issue