From b1f4c06d4ec10fddfa3d877b58112b3002eba413 Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Mon, 22 Feb 2021 19:27:26 +0100 Subject: [PATCH] fpu fix arbitration/lock bugs add getVexRiscvRegressionArgs --- src/main/scala/vexriscv/Services.scala | 2 +- src/main/scala/vexriscv/TestsWorkspace.scala | 193 ++++++++++-------- src/main/scala/vexriscv/VexRiscv.scala | 13 +- .../demo/smp/VexRiscvSmpCluster.scala | 2 +- src/main/scala/vexriscv/ip/fpu/FpuCore.scala | 18 +- .../scala/vexriscv/ip/fpu/Interface.scala | 9 +- .../scala/vexriscv/plugin/BranchPlugin.scala | 1 - .../scala/vexriscv/plugin/CsrPlugin.scala | 9 +- .../vexriscv/plugin/DBusCachedPlugin.scala | 14 +- .../scala/vexriscv/plugin/FpuPlugin.scala | 28 +-- .../vexriscv/plugin/IBusCachedPlugin.scala | 13 +- .../plugin/MulDivIterativePlugin.scala | 9 +- .../scala/vexriscv/plugin/MulPlugin.scala | 7 +- src/test/cpp/regression/main.cpp | 103 ++++++---- src/test/cpp/regression/makefile | 9 + src/test/scala/vexriscv/ip/fpu/FpuTest.scala | 15 +- 16 files changed, 293 insertions(+), 152 deletions(-) diff --git a/src/main/scala/vexriscv/Services.scala b/src/main/scala/vexriscv/Services.scala index d7453d1..79a3059 100644 --- a/src/main/scala/vexriscv/Services.scala +++ b/src/main/scala/vexriscv/Services.scala @@ -8,7 +8,7 @@ import spinal.lib._ import scala.beans.BeanProperty trait JumpService{ - def createJumpInterface(stage : Stage, priority : Int = 0) : Flow[UInt] + def createJumpInterface(stage : Stage, priority : Int = 0) : Flow[UInt] //High priority win } trait IBusFetcher{ diff --git a/src/main/scala/vexriscv/TestsWorkspace.scala b/src/main/scala/vexriscv/TestsWorkspace.scala index bcda45f..f4347f4 100644 --- a/src/main/scala/vexriscv/TestsWorkspace.scala +++ b/src/main/scala/vexriscv/TestsWorkspace.scala @@ -25,95 +25,128 @@ import spinal.lib._ import vexriscv.ip._ import spinal.lib.bus.avalon.AvalonMM import spinal.lib.eda.altera.{InterruptReceiverTag, ResetEmitterTag} +import vexriscv.demo.smp.VexRiscvSmpClusterGen import vexriscv.ip.fpu.FpuParameter // make clean all SEED=42 MMU=no STOP_ON_ERROR=yes DBUS_EXCLUSIVE=yes DBUS_INVALIDATE=yes SUPERVISOR=yes REDO=1 DHRYSTONE=yes LRSC=yes AMO=yes LINUX_REGRESSION=yes TRACE=yes TRACE_START=1000000000 FLOW_INFO=ye IBUS_DATA_WIDTH=128 DBUS_DATA_WIDTH=128 -//make clean all SEED=42 MMU=no STOP_ON_ERROR=yes DBUS_EXCLUSIVE=yes DBUS_INVALIDATE=yes SUPERVISOR=yes REDO=1 DHRYSTONE=yes LRSC=yes AMO=yes TRACE=yes TRACE_START=1000000000 FLOW_INFO=ye IBUS_DATA_WIDTH=128 DBUS_DATA_WIDTH=128 LINUX_SOC_SMP=yes VMLINUX=../../../../../buildroot/output/images/Image RAMDISK=../../../../../buildroot/output/images/rootfs.cpio DTB=../../../../../buildroot/output/images/dtb EMULATOR=../../../../../opensbi/build/platform/spinal/vexriscv/sim/smp/firmware/fw_jump.bin +// make clean all IBUS=CACHED IBUS_DATA_WIDTH=64 COMPRESSED=no DBUS=CACHED DBUS_LOAD_DATA_WIDTH=64 DBUS_STORE_DATA_WIDTH=64 LRSC=yes AMO=yes SUPERVISOR=yes DBUS_EXCLUSIVE=yes DBUS_INVALIDATE=yes MUL=yes DIV=yes RVF=yes RVD=yes DEBUG_PLUGIN=no LINUX_SOC_SMP=yes EMULATOR=$IMAGES/fw_jump.bin VMLINUX=$IMAGES/Image DTB=$IMAGES/linux.dtb RAMDISK=$IMAGES/rootfs.cpio TRACE=ye REDO=1 DEBUG=ye WITH_USER_IO=yes SEED=42 object TestsWorkspace { def main(args: Array[String]) { SpinalConfig().generateVerilog { + // make clean all REDO=10 CSR=no MMU=no COREMARK=no RVF=yes RVD=yes REDO=1 DBUS_LOAD_DATA_WIDTH=64 DBUS_STORE_DATA_WIDTH=64 DEBUG=ye TRACE=ye - val config = VexRiscvConfig( - plugins = List( - new IBusCachedPlugin( - prediction = DYNAMIC, - config = InstructionCacheConfig( - cacheSize = 4096, - bytePerLine =32, - wayCount = 1, - addressWidth = 32, - cpuDataWidth = 32, - memDataWidth = 32, - catchIllegalAccess = true, - catchAccessFault = true, - asyncTagMemory = false, - twoCycleRam = true, - twoCycleCache = true - ), - memoryTranslatorPortConfig = MmuPortConfig( - portTlbSize = 4 - ) - ), - new DBusCachedPlugin( - config = new DataCacheConfig( - cacheSize = 4096, - bytePerLine = 32, - wayCount = 1, - addressWidth = 32, - cpuDataWidth = 64, - memDataWidth = 64, - catchAccessError = true, - catchIllegal = true, - catchUnaligned = true - ), - memoryTranslatorPortConfig = MmuPortConfig( - portTlbSize = 6 - ) - ), - new MmuPlugin( - virtualRange = _(31 downto 28) === 0xC, - ioRange = _(31 downto 28) === 0xF - ), - new DecoderSimplePlugin( - catchIllegalInstruction = true - ), - new RegFilePlugin( - regFileReadyKind = plugin.SYNC, - zeroBoot = false - ), - new IntAluPlugin, - new SrcPlugin( - separatedAddSub = false, - executeInsertion = true - ), - new FullBarrelShifterPlugin, - new HazardSimplePlugin( - bypassExecute = true, - bypassMemory = true, - bypassWriteBack = true, - bypassWriteBackBuffer = true, - pessimisticUseSrc = false, - pessimisticWriteRegFile = false, - pessimisticAddressMatch = false - ), - new MulPlugin, - new DivPlugin, - new CsrPlugin(CsrPluginConfig.small(0x80000020l)), - new DebugPlugin(ClockDomain.current.clone(reset = Bool().setName("debugReset"))), - new BranchPlugin( - earlyBranch = false, - catchAddressMisaligned = true - ), - new YamlPlugin("cpu0.yaml") - ) - ) - config.plugins += new FpuPlugin( - externalFpu = false, - p = FpuParameter( - withDouble = true - ) +// val config = VexRiscvConfig( +// plugins = List( +// new IBusCachedPlugin( +// prediction = DYNAMIC, +// config = InstructionCacheConfig( +// cacheSize = 4096, +// bytePerLine =32, +// wayCount = 1, +// addressWidth = 32, +// cpuDataWidth = 32, +// memDataWidth = 32, +// catchIllegalAccess = true, +// catchAccessFault = true, +// asyncTagMemory = false, +// twoCycleRam = true, +// twoCycleCache = true +// ), +// memoryTranslatorPortConfig = MmuPortConfig( +// portTlbSize = 4 +// ) +// ), +// new DBusCachedPlugin( +// config = new DataCacheConfig( +// cacheSize = 4096, +// bytePerLine = 32, +// wayCount = 1, +// addressWidth = 32, +// cpuDataWidth = 64, +// memDataWidth = 64, +// catchAccessError = true, +// catchIllegal = true, +// catchUnaligned = true +// ), +// memoryTranslatorPortConfig = MmuPortConfig( +// portTlbSize = 6 +// ) +// ), +// new MmuPlugin( +// virtualRange = _(31 downto 28) === 0xC, +// ioRange = _(31 downto 28) === 0xF +// ), +// new DecoderSimplePlugin( +// catchIllegalInstruction = true +// ), +// new RegFilePlugin( +// regFileReadyKind = plugin.SYNC, +// zeroBoot = false +// ), +// new IntAluPlugin, +// new SrcPlugin( +// separatedAddSub = false, +// executeInsertion = true +// ), +// new FullBarrelShifterPlugin, +// new HazardSimplePlugin( +// bypassExecute = true, +// bypassMemory = true, +// bypassWriteBack = true, +// bypassWriteBackBuffer = true, +// pessimisticUseSrc = false, +// pessimisticWriteRegFile = false, +// pessimisticAddressMatch = false +// ), +// new MulPlugin, +// new DivPlugin, +// new CsrPlugin(CsrPluginConfig.small(0x80000020l)), +// new DebugPlugin(ClockDomain.current.clone(reset = Bool().setName("debugReset"))), +// new BranchPlugin( +// earlyBranch = false, +// catchAddressMisaligned = true +// ), +// new YamlPlugin("cpu0.yaml") +// ) +// ) +// config.plugins += new FpuPlugin( +// externalFpu = false, +// p = FpuParameter( +// withDouble = true +// ) +// ) + +// mkdir buildroot-build +// cd buildroot-build/ +// make O=$PWD BR2_EXTERNAL=../buildroot-spinal-saxon -C ../buildroot saxon_regression_defconfig + + // export IMAGES=/media/data/open/SaxonSoc/artyA7SmpUpdate/buildroot-regression/buildroot-build/images + // make clean all IBUS=CACHED IBUS_DATA_WIDTH=64 COMPRESSED=no DBUS=CACHED DBUS_LOAD_DATA_WIDTH=64 DBUS_STORE_DATA_WIDTH=64 LRSC=yes AMO=yes SUPERVISOR=yes DBUS_EXCLUSIVE=yes DBUS_INVALIDATE=yes MUL=yes DIV=yes RVF=yes RVD=yes DEBUG_PLUGIN=no LINUX_SOC_SMP=yes EMULATOR=$IMAGES/fw_jump.bin VMLINUX=$IMAGES/Image DTB=$IMAGES/linux.dtb RAMDISK=$IMAGES/rootfs.cpio TRACE=ye REDO=1 DEBUG=ye WITH_USER_IO=no + // make clean all IBUS=CACHED IBUS_DATA_WIDTH=64 COMPRESSED=no DBUS=CACHED DBUS_LOAD_DATA_WIDTH=64 DBUS_STORE_DATA_WIDTH=64 LRSC=yes AMO=yes SUPERVISOR=yes DBUS_EXCLUSIVE=yes DBUS_INVALIDATE=yes MUL=yes DIV=yes RVF=yes RVD=yes DEBUG_PLUGIN=no LINUX_SOC_SMP=yes EMULATOR=$IMAGES/fw_jump.bin VMLINUX=$IMAGES/Image DTB=$IMAGES/linux.dtb RAMDISK=$IMAGES/rootfs.cpio TRACE=yes REDO=1 DEBUG=ye WITH_USER_IO=no FLOW_INFO=yes TRACE_START=47000000000ll SEED=43 + // make clean all IBUS=CACHED IBUS_DATA_WIDTH=64 COMPRESSED=no DBUS=CACHED DBUS_LOAD_DATA_WIDTH=64 DBUS_STORE_DATA_WIDTH=64 LRSC=yes AMO=yes SUPERVISOR=yes DBUS_EXCLUSIVE=yes DBUS_INVALIDATE=yes MUL=yes DIV=yes RVF=yes RVD=yes DEBUG_PLUGIN=no LINUX_SOC_SMP=yes EMULATOR=$IMAGES/fw_jump.bin VMLINUX=$IMAGES/Image DTB=$IMAGES/linux.dtb RAMDISK=$IMAGES/rootfs.cpio TRACE=yes REDO=1 DEBUG=ye WITH_USER_IO=no FLOW_INFO=yes TRACE_START=47000000000ll SEED=45 + //make clean all IBUS=CACHED IBUS_DATA_WIDTH=64 COMPRESSED=no DBUS=CACHED DBUS_LOAD_DATA_WIDTH=64 DBUS_STORE_DATA_WIDTH=64 LRSC=yes AMO=yes SUPERVISOR=yes DBUS_EXCLUSIVE=yes DBUS_INVALIDATE=yes MUL=yes DIV=yes RVF=yes RVD=yes DEBUG_PLUGIN=no LINUX_SOC_SMP=yes EMULATOR=$IMAGES/fw_jump.bin VMLINUX=$IMAGES/Image DTB=$IMAGES/linux.dtb RAMDISK=$IMAGES/rootfs.cpio TRACE=yes REDO=1 DEBUG=ye WITH_USER_IO=no FLOW_INFO=yes TRACE_START=565000000ll SEED=45 + val config = VexRiscvSmpClusterGen.vexRiscvConfig( + hartId = 0, + ioRange = _ (31 downto 28) === 0xF, + resetVector = 0x80000000l, + iBusWidth = 64, + dBusWidth = 64, + loadStoreWidth = 64, + iCacheSize = 4096*2, + dCacheSize = 4096*2, + iCacheWays = 2, + dCacheWays = 2, + withFloat = true, + withDouble = true, + externalFpu = false ) + + + println("Args :") + println(config.getRegressionArgs().mkString(" ")) + + val toplevel = new VexRiscv(config) // val toplevel = new VexRiscv(configLight) // val toplevel = new VexRiscv(configTest) diff --git a/src/main/scala/vexriscv/VexRiscv.scala b/src/main/scala/vexriscv/VexRiscv.scala index 5f7865c..77ce1c1 100644 --- a/src/main/scala/vexriscv/VexRiscv.scala +++ b/src/main/scala/vexriscv/VexRiscv.scala @@ -16,7 +16,9 @@ object VexRiscvConfig{ def apply(plugins : Seq[Plugin[VexRiscv]] = ArrayBuffer()) : VexRiscvConfig = apply(true,true,plugins) } - +trait VexRiscvRegressionArg{ + def getVexRiscvRegressionArgs() : Seq[String] +} case class VexRiscvConfig(){ var withMemoryStage = true var withWriteBackStage = true @@ -83,6 +85,15 @@ case class VexRiscvConfig(){ } object SRC1_CTRL extends Stageable(Src1CtrlEnum()) object SRC2_CTRL extends Stageable(Src2CtrlEnum()) + + def getRegressionArgs() : Seq[String] = { + val str = ArrayBuffer[String]() + plugins.foreach{ + case e : VexRiscvRegressionArg => str ++= e.getVexRiscvRegressionArgs() + case _ => + } + str + } } diff --git a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala index 5b66670..0e3018b 100644 --- a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala +++ b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala @@ -279,7 +279,7 @@ object VexRiscvSmpClusterGen { ) if(withFloat) config.plugins += new FpuPlugin( - externalFpu = true, + externalFpu = externalFpu, p = FpuParameter(withDouble = withDouble) ) config diff --git a/src/main/scala/vexriscv/ip/fpu/FpuCore.scala b/src/main/scala/vexriscv/ip/fpu/FpuCore.scala index a241099..21b2ad7 100644 --- a/src/main/scala/vexriscv/ip/fpu/FpuCore.scala +++ b/src/main/scala/vexriscv/ip/fpu/FpuCore.scala @@ -184,7 +184,8 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{ val commitLogic = for(source <- 0 until portCount) yield new Area{ val fire = False - val target, hit = Reg(UInt(log2Up(rfLockCount) bits)) init(0) + val target, hit = Reg(UInt(log2Up(rfLockCount+1) bits)) init(0) + val full = target + 1 === hit when(fire){ hit := hit + 1 } @@ -192,7 +193,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{ commitFork.commit(source).ready := False when(commitFork.commit(source).valid) { for (lock <- rf.lock) { - when(lock.valid && lock.source === source && lock.id === hit) { + when(lock.valid && lock.source === source && lock.id === hit && !lock.commited) { fire := True lock.commited := True lock.write := commitFork.commit(source).write @@ -233,7 +234,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{ } val hits = List((useRs1, s0.rs1), (useRs2, s0.rs2), (useRs3, s0.rs3), (useRd, s0.rd)).map{case (use, reg) => use && rf.lock.map(l => l.valid && l.source === s0.source && l.address === reg).orR} - val hazard = hits.orR + val hazard = hits.orR || commitLogic.map(_.full).read(s0.source) when(s0.fire && useRd){ for(i <- 0 until portCount){ when(s0.source === i){ @@ -938,7 +939,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{ output.roundMode := input.roundMode output.scrap := norm.scrap output.value := norm.output - output.NV := NV + output.NV := NV //TODO isn't propagated in FMA output.DZ := False decode.mulToAdd.valid := input.valid && input.add @@ -946,7 +947,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{ decode.mulToAdd.rs1.mantissa := norm.output.mantissa >> 1 //FMA Precision lost decode.mulToAdd.rs1.exponent := norm.output.exponent decode.mulToAdd.rs1.sign := norm.output.sign - decode.mulToAdd.rs1.special := False //TODO + decode.mulToAdd.rs1.special := norm.output.special decode.mulToAdd.rs2 := input.rs3 decode.mulToAdd.rd := input.rd decode.mulToAdd.lockId := input.lockId @@ -1289,6 +1290,8 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{ if(p.withMul) (inputs += mul.result.output) if(p.withShortPipMisc) (inputs += shortPip.rfOutput.pipelined(m2s = true)) val arbitrated = StreamArbiterFactory.lowerFirst.noLock.on(inputs) + val isCommited = rf.lock.map(_.commited).read(arbitrated.lockId) + val commited = arbitrated.haltWhen(!isCommited).toFlow } class RoundFront extends MergeInput{ @@ -1298,7 +1301,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{ } val roundFront = new Area { - val input = merge.arbitrated.stage() + val input = merge.commited.stage() val output = input.swapPayload(new RoundFront()) output.payload.assignSomeByName(input.payload) @@ -1328,8 +1331,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{ val roundBack = new Area{ val input = roundFront.output.stage() - val isCommited = rf.lock.map(_.commited).read(input.lockId) - val output = input.haltWhen(!isCommited).toFlow.swapPayload(RoundOutput()) + val output = input.swapPayload(RoundOutput()) import input.payload._ val math = p.internalFloating() diff --git a/src/main/scala/vexriscv/ip/fpu/Interface.scala b/src/main/scala/vexriscv/ip/fpu/Interface.scala index 9e02161..dd0d2f0 100644 --- a/src/main/scala/vexriscv/ip/fpu/Interface.scala +++ b/src/main/scala/vexriscv/ip/fpu/Interface.scala @@ -92,8 +92,15 @@ object FpuFormat extends SpinalEnum{ val FLOAT, DOUBLE = newElement() } -object FpuRoundMode extends SpinalEnum(defaultEncoding = binarySequential){ +object FpuRoundMode extends SpinalEnum(){ val RNE, RTZ, RDN, RUP, RMM = newElement() + defaultEncoding = SpinalEnumEncoding("opt")( + RNE -> 0, + RTZ -> 1, + RDN -> 2, + RUP -> 3, + RMM -> 4 + ) } object FpuRoundModeInstr extends SpinalEnum(){ val RNE, RTZ, RDN, RUP, RMM, DYN = newElement() diff --git a/src/main/scala/vexriscv/plugin/BranchPlugin.scala b/src/main/scala/vexriscv/plugin/BranchPlugin.scala index 9c36cf7..53ef62d 100644 --- a/src/main/scala/vexriscv/plugin/BranchPlugin.scala +++ b/src/main/scala/vexriscv/plugin/BranchPlugin.scala @@ -66,7 +66,6 @@ class BranchPlugin(earlyBranch : Boolean, object IS_FENCEI extends Stageable(Bool) var jumpInterface : Flow[UInt] = null - var predictionJumpInterface : Flow[UInt] = null var predictionExceptionPort : Flow[ExceptionCause] = null var branchExceptionPort : Flow[ExceptionCause] = null diff --git a/src/main/scala/vexriscv/plugin/CsrPlugin.scala b/src/main/scala/vexriscv/plugin/CsrPlugin.scala index 407d890..456d688 100644 --- a/src/main/scala/vexriscv/plugin/CsrPlugin.scala +++ b/src/main/scala/vexriscv/plugin/CsrPlugin.scala @@ -544,7 +544,7 @@ class CsrPlugin(val config: CsrPluginConfig) extends Plugin[VexRiscv] with Excep if(supervisorGen) { - redoInterface = pcManagerService.createJumpInterface(pipeline.execute, -1) + redoInterface = pcManagerService.createJumpInterface(pipeline.execute, 10) } exceptionPendings = Vec(Bool, pipeline.stages.length) @@ -749,12 +749,13 @@ class CsrPlugin(val config: CsrPluginConfig) extends Plugin[VexRiscv] with Excep satpAccess(CSR.SATP, 31 -> satp.MODE, 22 -> satp.ASID, 0 -> satp.PPN) - if(supervisorGen) { + val satpLogic = supervisorGen generate new Area { redoInterface.valid := False redoInterface.payload := decode.input(PC) - duringWrite(CSR.SATP){ - execute.arbitration.flushNext := True + duringWrite(CSR.SATP) { redoInterface.valid := True + execute.arbitration.flushNext := True + decode.arbitration.haltByOther := True } } } diff --git a/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala b/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala index 9c939bf..32cfdb9 100644 --- a/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala +++ b/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala @@ -33,7 +33,7 @@ class DBusCachedPlugin(val config : DataCacheConfig, dBusCmdSlavePipe : Boolean = false, dBusRspSlavePipe : Boolean = false, relaxedMemoryTranslationRegister : Boolean = false, - csrInfo : Boolean = false) extends Plugin[VexRiscv] with DBusAccessService with DBusEncodingService { + csrInfo : Boolean = false) extends Plugin[VexRiscv] with DBusAccessService with DBusEncodingService with VexRiscvRegressionArg { import config._ assert(!(config.withExternalAmo && !dBusRspSlavePipe)) assert(isPow2(cacheSize)) @@ -52,6 +52,18 @@ class DBusCachedPlugin(val config : DataCacheConfig, dBusAccess } + override def getVexRiscvRegressionArgs(): Seq[String] = { + var args = List[String]() + args :+= "DBUS=CACHED" + args :+= s"DBUS_LOAD_DATA_WIDTH=$memDataWidth" + args :+= s"DBUS_STORE_DATA_WIDTH=$cpuDataWidth" + if(withLrSc) args :+= "LRSC=yes" + if(withAmo) args :+= "AMO=yes" + if(config.withExclusive && config.withInvalidate) args ++= List("DBUS_EXCLUSIVE=yes", "DBUS_INVALIDATE=yes") + args + } + + override def addLoadWordEncoding(key : MaskedLiteral): Unit = { val decoderService = pipeline.service(classOf[DecoderService]) val cfg = pipeline.config diff --git a/src/main/scala/vexriscv/plugin/FpuPlugin.scala b/src/main/scala/vexriscv/plugin/FpuPlugin.scala index c8af1d9..1d657b4 100644 --- a/src/main/scala/vexriscv/plugin/FpuPlugin.scala +++ b/src/main/scala/vexriscv/plugin/FpuPlugin.scala @@ -10,7 +10,7 @@ import vexriscv.ip.fpu._ import scala.collection.mutable.ArrayBuffer class FpuPlugin(externalFpu : Boolean = false, - p : FpuParameter) extends Plugin[VexRiscv]{ + p : FpuParameter) extends Plugin[VexRiscv] with VexRiscvRegressionArg { object FPU_ENABLE extends Stageable(Bool()) object FPU_COMMIT extends Stageable(Bool()) @@ -24,6 +24,13 @@ class FpuPlugin(externalFpu : Boolean = false, var port : FpuPort = null + override def getVexRiscvRegressionArgs(): Seq[String] = { + var args = List[String]() + args :+= "RVF=yes" + if(p.withDouble) args :+= "RVD=yes" + args + } + override def setup(pipeline: VexRiscv): Unit = { import pipeline.config._ @@ -154,7 +161,7 @@ class FpuPlugin(externalFpu : Boolean = false, dBusEncoding.addStoreWordEncoding(FSD) } - exposeEncoding() +// exposeEncoding() } def exposeEncoding(): Unit ={ @@ -171,15 +178,6 @@ class FpuPlugin(externalFpu : Boolean = false, if(s == pipeline.config.RS1_USE) (if(isSet)rs1 += key else rs1N += key) } -// println("COMMIT => ") -// filter(0x53, commits).foreach(println) -// println("COMMITN => ") -// filter(0x53, commitsN).foreach(println) -// println("RSP => ") -// filter(0x53, rsps).foreach(println) -// println("RSPN => ") -// filter(0x53, rspsN).foreach(println) - val commitLut, rspLut, rs1Lut = Array.fill(32)(false) filter(0x53,commits).foreach{m => val idx = (m.value >> 27).toInt @@ -256,11 +254,17 @@ class FpuPlugin(externalFpu : Boolean = false, execute.arbitration.haltByOther setWhen(csrActive && hasPending) // pessimistic val fs = Reg(Bits(2 bits)) init(1) - when(hasPending){ + val sd = fs === 3 + + when(stages.last.arbitration.isFiring && stages.last.input(FPU_ENABLE)){ fs := 3 //DIRTY } + service.rw(CSR.SSTATUS, 13, fs) service.rw(CSR.MSTATUS, 13, fs) + + service.r(CSR.SSTATUS, 31, sd) + service.r(CSR.MSTATUS, 31, sd) } decode plug new Area{ diff --git a/src/main/scala/vexriscv/plugin/IBusCachedPlugin.scala b/src/main/scala/vexriscv/plugin/IBusCachedPlugin.scala index e23cec7..9de1382 100644 --- a/src/main/scala/vexriscv/plugin/IBusCachedPlugin.scala +++ b/src/main/scala/vexriscv/plugin/IBusCachedPlugin.scala @@ -49,15 +49,26 @@ class IBusCachedPlugin(resetVector : BigInt = 0x80000000l, injectorStage = (!config.twoCycleCache && !withoutInjectorStage) || injectorStage, relaxPredictorAddress = relaxPredictorAddress, fetchRedoGen = true, - predictionBuffer = predictionBuffer){ + predictionBuffer = predictionBuffer) with VexRiscvRegressionArg{ import config._ + + assert(isPow2(cacheSize)) assert(!(memoryTranslatorPortConfig != null && config.cacheSize/config.wayCount > 4096), "When the I$ is used with MMU, each way can't be bigger than a page (4096 bytes)") assert(!(withoutInjectorStage && injectorStage)) + + override def getVexRiscvRegressionArgs(): Seq[String] = { + var args = List[String]() + args :+= "IBUS=CACHED" + args :+= s"IBUS_DATA_WIDTH=$memDataWidth" + args :+= s"COMPRESSED=${if(compressedGen) "yes" else "no"}" + args + } + var iBus : InstructionCacheMemBus = null var mmuBus : MemoryTranslatorBus = null var privilegeService : PrivilegeService = null diff --git a/src/main/scala/vexriscv/plugin/MulDivIterativePlugin.scala b/src/main/scala/vexriscv/plugin/MulDivIterativePlugin.scala index da3738d..fff12ef 100644 --- a/src/main/scala/vexriscv/plugin/MulDivIterativePlugin.scala +++ b/src/main/scala/vexriscv/plugin/MulDivIterativePlugin.scala @@ -19,9 +19,16 @@ class MulDivIterativePlugin(genMul : Boolean = true, mulUnrollFactor : Int = 1, divUnrollFactor : Int = 1, dhrystoneOpt : Boolean = false, - customMul : (UInt, UInt, Stage, VexRiscv) => Area = null) extends Plugin[VexRiscv]{ + customMul : (UInt, UInt, Stage, VexRiscv) => Area = null) extends Plugin[VexRiscv] with VexRiscvRegressionArg { import MulDivIterativePlugin._ + override def getVexRiscvRegressionArgs(): Seq[String] = { + var args = List[String]() + if(genMul) args :+= "MUL=yes" + if(genDiv) args :+= "DIV=yes" + args + } + override def setup(pipeline: VexRiscv): Unit = { import Riscv._ import pipeline.config._ diff --git a/src/main/scala/vexriscv/plugin/MulPlugin.scala b/src/main/scala/vexriscv/plugin/MulPlugin.scala index 31714e8..2a13900 100644 --- a/src/main/scala/vexriscv/plugin/MulPlugin.scala +++ b/src/main/scala/vexriscv/plugin/MulPlugin.scala @@ -5,7 +5,7 @@ import spinal.core._ import spinal.lib.KeepAttribute //Input buffer generaly avoid the FPGA synthesis to duplicate reg inside the DSP cell, which could stress timings quite much. -class MulPlugin(inputBuffer : Boolean = false) extends Plugin[VexRiscv]{ +class MulPlugin(inputBuffer : Boolean = false) extends Plugin[VexRiscv] with VexRiscvRegressionArg { object MUL_LL extends Stageable(UInt(32 bits)) object MUL_LH extends Stageable(SInt(34 bits)) object MUL_HL extends Stageable(SInt(34 bits)) @@ -15,6 +15,11 @@ class MulPlugin(inputBuffer : Boolean = false) extends Plugin[VexRiscv]{ object IS_MUL extends Stageable(Bool) + override def getVexRiscvRegressionArgs(): Seq[String] = { + List("MUL=yes") + } + + override def setup(pipeline: VexRiscv): Unit = { import Riscv._ import pipeline.config._ diff --git a/src/test/cpp/regression/main.cpp b/src/test/cpp/regression/main.cpp index c6330a7..ac9417b 100644 --- a/src/test/cpp/regression/main.cpp +++ b/src/test/cpp/regression/main.cpp @@ -227,7 +227,8 @@ class success : public std::exception { }; #define SIP 0x144 #define SATP 0x180 - +#define UTIME 0xC01 // rdtime +#define UTIMEH 0xC81 #define SSTATUS_SIE 0x00000002 #define SSTATUS_SPIE 0x00000020 @@ -426,6 +427,7 @@ public: bool lrscReserved; uint32_t lrscReservedAddress; u32 fpuCompletionTockens; + u32 dutRfWriteValue; RiscvGolden() { pc = 0x80000000; @@ -444,6 +446,10 @@ public: status.spp = 1; #ifdef RVF status.fs = 1; + misa |= 1 << 5; + #endif + #ifdef RVD + misa |= 1 << 3; #endif fcsr.flags = 0; fcsr.frm = 0; @@ -515,10 +521,10 @@ public: } void trap(bool interrupt,int32_t cause, bool valueWrite, uint32_t value) { #ifdef FLOW_INFO - cout << "TRAP " << (interrupt ? "interrupt" : "exception") << " cause=" << cause << " PC=0x" << hex << pc << " val=0x" << hex << value << dec << endl; - if(cause == 9){ - cout << hex << " a7=0x" << regs[17] << " a0=0x" << regs[10] << " a1=0x" << regs[11] << " a2=0x" << regs[12] << dec << endl; - } +// cout << "TRAP " << (interrupt ? "interrupt" : "exception") << " cause=" << cause << " PC=0x" << hex << pc << " val=0x" << hex << value << dec << endl; +// if(cause == 9){ +// cout << hex << " a7=0x" << regs[17] << " a0=0x" << regs[10] << " a1=0x" << regs[11] << " a2=0x" << regs[12] << dec << endl; +// } #endif //Check leguality of the interrupt if(interrupt) { @@ -584,7 +590,7 @@ public: virtual bool csrRead(int32_t csr, uint32_t *value){ if(((csr >> 8) & 0x3) > privilege) return true; switch(csr){ - case MSTATUS: *value = status.raw & MSTATUS_READ_MASK; break; + case MSTATUS: *value = (status.raw | (((status.raw & 0x6000) == 0x6000) ? 0x80000000 : 0)) & MSTATUS_READ_MASK; break; case MIP: *value = getIp().raw; break; case MIE: *value = ie.raw; break; case MTVEC: *value = mtvec.raw; break; @@ -597,7 +603,7 @@ public: case MIDELEG: *value = mideleg; break; case MHARTID: *value = 0; break; - case SSTATUS: *value = status.raw & 0xC0133; break; + case SSTATUS: *value = (status.raw | (((status.raw & 0x6000) == 0x6000) ? 0x80000000 : 0)) & (0x800C0133 | STATUS_FS_MASK); break; case SIP: *value = getIp().raw & 0x333; break; case SIE: *value = ie.raw & 0x333; break; case STVEC: *value = stvec.raw; break; @@ -613,6 +619,11 @@ public: case FFLAGS: *value = fcsr.flags; break; #endif + #ifdef UTIME_INPUT + case UTIME: *value = dutRfWriteValue; break; + case UTIMEH: *value = dutRfWriteValue; break; + #endif + default: return true; break; } return false; @@ -627,12 +638,12 @@ public: return value; } - #define maskedWrite(dst, src, mask) dst=(dst & ~mask)|(src & mask); + #define maskedWrite(dst, src, mask) dst=((dst) & ~(mask))|((src) & (mask)); virtual bool csrWrite(int32_t csr, uint32_t value){ if(((csr >> 8) & 0x3) > privilege) return true; switch(csr){ - case MSTATUS: status.raw = value; break; + case MSTATUS: status.raw = value & 0x7FFFFFFF; break; case MIP: ipSoft = value; break; case MIE: ie.raw = value; break; case MTVEC: mtvec.raw = value; break; @@ -644,7 +655,7 @@ public: case MEDELEG: medeleg = value & (~0x8); break; case MIDELEG: mideleg = value; break; - case SSTATUS: maskedWrite(status.raw, value,0xC0133 | STATUS_FS_MASK); break; + case SSTATUS: maskedWrite(status.raw, value, 0xC0133 | STATUS_FS_MASK); break; case SIP: maskedWrite(ipSoft, value,0x333); break; case SIE: maskedWrite(ie.raw, value,0x333); break; case STVEC: stvec.raw = value; break; @@ -652,8 +663,7 @@ public: case STVAL: sbadaddr = value; break; case SEPC: sepc = value; break; case SSCRATCH: sscratch = value; break; - case SATP: satp.raw = value; break; - + case SATP: satp.raw = value; break; #ifdef RVF case FCSR: fcsr.raw = value & 0x7F; break; @@ -739,6 +749,7 @@ public: fpuCompletionTockens -= 1; } + #define rd32 ((i >> 7) & 0x1F) #define iBits(lo, len) ((i >> lo) & ((1 << len)-1)) #define iBitsSigned(lo, len) int32_t(i) << (32-lo-len) >> (32-len) @@ -833,6 +844,7 @@ public: fcsr.flags |= rsp.flags; rfWrite(rd32, (u32)rsp.value); } + status.fs = 3; pcWrite(pc + 4); } break; case 0x07: { //Fpu load @@ -860,6 +872,7 @@ public: cout << "FPU load missmatch DUT=" << hex << commit.value << " REF=" << data << dec << endl; fail(); } else { + status.fs = 3; pcWrite(pc + 4); } } @@ -882,6 +895,7 @@ public: } else { if(v2p(address, &pAddr, WRITE)){ trap(0, 15, address); return; } dWrite(pAddr, size, (uint8_t*) &rsp.value); + status.fs = 3; pcWrite(pc + 4); } } break; @@ -1687,10 +1701,15 @@ public: //if(mTime == mTimeCmp) printf("SIM timer tick\n"); #endif + + #ifdef UTIME_INPUT + top->utime = mTime; + #endif + currentTime = i; #ifdef FLOW_INFO - if(i % 2000000 == 0) cout << "**" << endl << "**" << endl << "**" << endl << "**" << endl << "**" << endl << "PROGRESS TRACE_START=" << i << endl; + if(i % 5000000 == 0) cout << endl << "**" << endl << "**" << endl << "PROGRESS TRACE_START=" << i << endl; #endif @@ -1764,6 +1783,7 @@ public: // cout << "- S " << privilegeCounters[1] << endl; // cout << "- M " << privilegeCounters[3] << endl; // } + riscvRef.dutRfWriteValue = top->VexRiscv->lastStageRegFileWrite_payload_data; riscvRef.step(); bool mIntTimer = false; bool mIntExt = false; @@ -2554,10 +2574,16 @@ public: virtual void onReset(){ top->dBus_cmd_ready = 1; top->dBus_rsp_valid = 0; + #ifdef DBUS_AGGREGATION + top->dBus_rsp_payload_aggregated = 0; + #endif #ifdef DBUS_INVALIDATE top->dBus_inv_valid = 0; top->dBus_ack_ready = 0; top->dBus_sync_valid = 0; + #ifdef DBUS_AGGREGATION + top->dBus_sync_payload_aggregated = 0; + #endif #endif } @@ -3700,7 +3726,7 @@ public: - virtual void dBusAccess(uint32_t addr,bool wr, uint32_t size,uint64_t mask, uint8_t *dataBytes, bool *error) { + virtual void dBusAccess(uint32_t addr,bool wr, uint32_t size, uint8_t *dataBytes, bool *error) { uint32_t *data = (uint32_t*)dataBytes; if(isPerifRegion(addr)) switch(addr){ case 0xF0010000: if(wr && *data != 0) fail(); else *data = 0; break; @@ -3733,9 +3759,9 @@ public: } } break; - default: cout << "Unmapped peripheral access : addr=0x" << hex << addr << " wr=" << wr << " mask=0x" << mask << " data=0x" << data << dec << endl; fail(); break; + default: cout << "Unmapped peripheral access : addr=0x" << hex << addr << " wr=" << wr << " data=0x" << data << dec << endl; fail(); break; } - Workspace::dBusAccess(addr,wr,size,mask,data,error); + Workspace::dBusAccess(addr,wr,size,dataBytes,error); } virtual void onStdout(char c){ @@ -4058,6 +4084,30 @@ int main(int argc, char **argv, char **env) { printf("BOOT\n"); timespec startedAt = timer_start(); + +#ifdef LINUX_SOC_SMP + { + + LinuxSocSmp soc("linuxSmp"); + #ifndef DEBUG_PLUGIN_EXTERNAL + soc.withRiscvRef(); + soc.loadBin(EMULATOR, 0x80000000); + soc.loadBin(VMLINUX, 0x80400000); + soc.loadBin(DTB, 0x80FF0000); + soc.loadBin(RAMDISK, 0x81000000); + #endif + //soc.setIStall(true); + //soc.setDStall(true); + soc.bootAt(0x80000000); + soc.run(0); +// soc.run((496300000l + 2000000) / 2); +// soc.run(438700000l/2); + return -1; + } +#endif + + + #ifdef RVF for(const string &name : riscvTestFloat){ redo(REDO,RiscvTest(name).withRiscvRef()->bootAt(0x80000188u)->writeWord(0x80000184u, 0x00305073)->run();) @@ -4141,27 +4191,6 @@ int main(int argc, char **argv, char **env) { #endif -#ifdef LINUX_SOC_SMP - { - - LinuxSocSmp soc("linuxSmp"); - #ifndef DEBUG_PLUGIN_EXTERNAL - soc.withRiscvRef(); - soc.loadBin(EMULATOR, 0x80000000); - soc.loadBin(VMLINUX, 0xC0000000); - soc.loadBin(DTB, 0xC4000000); - soc.loadBin(RAMDISK, 0xC2000000); - #endif - //soc.setIStall(true); - //soc.setDStall(true); - soc.bootAt(0x80000000); - soc.run(0); -// soc.run((496300000l + 2000000) / 2); -// soc.run(438700000l/2); - return -1; - } -#endif - diff --git a/src/test/cpp/regression/makefile b/src/test/cpp/regression/makefile index 19bee06..b8759c9 100644 --- a/src/test/cpp/regression/makefile +++ b/src/test/cpp/regression/makefile @@ -150,6 +150,15 @@ ifneq ($(EXTERNAL_INTERRUPT),no) endif endif +ifneq ($(shell grep utime ${VEXRISCV_FILE} -w),) + ADDCFLAGS += -CFLAGS -DUTIME_INPUT +endif + +ifneq ($(shell grep dBus_rsp_payload_aggregated ${VEXRISCV_FILE} -w),) + ADDCFLAGS += -CFLAGS -DDBUS_AGGREGATION +endif + + ifneq ($(RUN_HEX),no) ADDCFLAGS += -CFLAGS -DRUN_HEX='\"$(RUN_HEX)\"' endif diff --git a/src/test/scala/vexriscv/ip/fpu/FpuTest.scala b/src/test/scala/vexriscv/ip/fpu/FpuTest.scala index 163460d..a0c97a0 100644 --- a/src/test/scala/vexriscv/ip/fpu/FpuTest.scala +++ b/src/test/scala/vexriscv/ip/fpu/FpuTest.scala @@ -55,11 +55,11 @@ class FpuTest extends FunSuite{ } def testP(p : FpuParameter){ - val portCount = 4 + val portCount = 1 val config = SimConfig config.allOptimisation -// if(p.withDouble) config.withFstWave + if(p.withDouble) config.withFstWave config.compile(new FpuCore(portCount, p){ for(i <- 0 until portCount) out(Bits(5 bits)).setName(s"flagAcc$i") := io.port(i).completion.flags.asBits setDefinitionName("FpuCore"+ (if(p.withDouble) "Double" else "")) @@ -1286,6 +1286,17 @@ class FpuTest extends FunSuite{ //TODO double <-> simple convertions if(p.withDouble) { + load(0, 1.0) + load(0, 2.0) + load(0, 2.5) + load(0, 0.75) + load(0, -5) + load(0, 0) + load(0, Double.PositiveInfinity) + load(0, Double.NaN) + dut.clockDomain.waitSampling(200) + simSuccess() + for(_ <- 0 until 10000) testSgnjF64() println("f64 sgnj done")