From a9d8c0a19f04412914d86b0bd9d77e09bb8233ec Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Mon, 18 Jan 2021 11:38:26 +0100 Subject: [PATCH] fpu wip --- src/main/scala/vexriscv/Riscv.scala | 58 +++ src/main/scala/vexriscv/TestsWorkspace.scala | 340 +++++++++--------- src/main/scala/vexriscv/VexRiscv.scala | 1 + src/main/scala/vexriscv/demo/GenFull.scala | 146 ++++---- src/main/scala/vexriscv/ip/DataCache.scala | 37 +- src/main/scala/vexriscv/ip/fpu/FpuCore.scala | 90 ++--- .../scala/vexriscv/ip/fpu/Interface.scala | 16 +- .../vexriscv/plugin/DBusCachedPlugin.scala | 74 +++- .../scala/vexriscv/plugin/FpuPlugin.scala | 124 +++++++ src/test/cpp/raw/common/asm.mk | 3 + src/test/cpp/raw/fpu/.gitignore | 4 + src/test/cpp/raw/fpu/build/amo.asm | 247 +++++++++++++ src/test/cpp/raw/fpu/build/amo.hex | 45 +++ src/test/cpp/raw/fpu/build/fpu.asm | 224 ++++++++++++ src/test/cpp/raw/fpu/build/fpu.hex | 42 +++ src/test/cpp/raw/fpu/makefile | 5 + src/test/cpp/raw/fpu/src/crt.S | 161 +++++++++ src/test/cpp/raw/fpu/src/ld | 16 + src/test/cpp/regression/main.cpp | 4 +- src/test/cpp/regression/makefile | 5 + src/test/scala/vexriscv/DhrystoneBench.scala | 199 +++++----- src/test/scala/vexriscv/ip/fpu/FpuTest.scala | 53 ++- 22 files changed, 1446 insertions(+), 448 deletions(-) create mode 100644 src/main/scala/vexriscv/plugin/FpuPlugin.scala create mode 100644 src/test/cpp/raw/fpu/.gitignore create mode 100644 src/test/cpp/raw/fpu/build/amo.asm create mode 100644 src/test/cpp/raw/fpu/build/amo.hex create mode 100644 src/test/cpp/raw/fpu/build/fpu.asm create mode 100644 src/test/cpp/raw/fpu/build/fpu.hex create mode 100644 src/test/cpp/raw/fpu/makefile create mode 100644 src/test/cpp/raw/fpu/src/crt.S create mode 100644 src/test/cpp/raw/fpu/src/ld diff --git a/src/main/scala/vexriscv/Riscv.scala b/src/main/scala/vexriscv/Riscv.scala index ee9be3d..ae64bff 100644 --- a/src/main/scala/vexriscv/Riscv.scala +++ b/src/main/scala/vexriscv/Riscv.scala @@ -11,6 +11,7 @@ object Riscv{ def funct3Range = 14 downto 12 def rs1Range = 19 downto 15 def rs2Range = 24 downto 20 + def rs3Range = 31 downto 27 def csrRange = 31 downto 20 case class IMM(instruction : Bits) extends Area{ @@ -119,6 +120,63 @@ object Riscv{ def FENCE_I = M"-----------------001-----0001111" def SFENCE_VMA = M"0001001----------000000001110011" + def FADD_S = M"0000000------------------1010011" + def FSUB_S = M"0000100------------------1010011" + def FMUL_S = M"0001000------------------1010011" + def FDIV_S = M"0001100------------------1010011" + def FSGNJ_S = M"0010000----------000-----1010011" + def FSGNJN_S = M"0010000----------001-----1010011" + def FSGNJX_S = M"0010000----------010-----1010011" + def FMIN_S = M"0010100----------000-----1010011" + def FMAX_S = M"0010100----------001-----1010011" + def FSQRT_S = M"010110000000-------------1010011" + def FADD_D = M"0000001------------------1010011" + def FSUB_D = M"0000101------------------1010011" + def FMUL_D = M"0001001------------------1010011" + def FDIV_D = M"0001101------------------1010011" + def FSGNJ_D = M"0010001----------000-----1010011" + def FSGNJN_D = M"0010001----------001-----1010011" + def FSGNJX_D = M"0010001----------010-----1010011" + def FMIN_D = M"0010101----------000-----1010011" + def FMAX_D = M"0010101----------001-----1010011" + def FCVT_S_D = M"010000000001-------------1010011" + def FCVT_D_S = M"010000100000-------------1010011" + def FSQRT_D = M"010110100000-------------1010011" + def FCVT_W_S = M"110000000000-------------1010011" + def FCVT_WU_S = M"110000000001-------------1010011" + def FCVT_L_S = M"110000000010-------------1010011" + def FCVT_LU_S = M"110000000011-------------1010011" + def FMV_X_W = M"111000000000-----000-----1010011" + def FCLASS_S = M"111000000000-----001-----1010011" + def FCVT_W_D = M"110000100000-------------1010011" + def FCVT_WU_D = M"110000100001-------------1010011" + def FCVT_L_D = M"110000100010-------------1010011" + def FCVT_LU_D = M"110000100011-------------1010011" + def FMV_X_D = M"111000100000-----000-----1010011" + def FCLASS_D = M"111000100000-----001-----1010011" + def FCVT_S_W = M"110100000000-------------1010011" + def FCVT_S_WU = M"110100000001-------------1010011" + def FCVT_S_L = M"110100000010-------------1010011" + def FCVT_S_LU = M"110100000011-------------1010011" + def FMV_W_X = M"111100000000-----000-----1010011" + def FCVT_D_W = M"110100100000-------------1010011" + def FCVT_D_WU = M"110100100001-------------1010011" + def FCVT_D_L = M"110100100010-------------1010011" + def FCVT_D_LU = M"110100100011-------------1010011" + def FMV_D_X = M"111100100000-----000-----1010011" + def FLW = M"-----------------010-----0000111" + def FLD = M"-----------------011-----0000111" + def FSW = M"-----------------010-----0100111" + def FSD = M"-----------------011-----0100111" + def FMADD_S = M"-----00------------------1000011" + def FMSUB_S = M"-----00------------------1000111" + def FNMSUB_S = M"-----00------------------1001011" + def FNMADD_S = M"-----00------------------1001111" + def FMADD_D = M"-----01------------------1000011" + def FMSUB_D = M"-----01------------------1000111" + def FNMSUB_D = M"-----01------------------1001011" + def FNMADD_D = M"-----01------------------1001111" + object CSR{ def MVENDORID = 0xF11 // MRO Vendor ID. def MARCHID = 0xF12 // MRO Architecture ID. diff --git a/src/main/scala/vexriscv/TestsWorkspace.scala b/src/main/scala/vexriscv/TestsWorkspace.scala index b522aed..3fc35b8 100644 --- a/src/main/scala/vexriscv/TestsWorkspace.scala +++ b/src/main/scala/vexriscv/TestsWorkspace.scala @@ -19,180 +19,181 @@ package vexriscv import vexriscv.plugin._ -import vexriscv.demo.SimdAddPlugin +import vexriscv.demo.{GenFull, SimdAddPlugin} import spinal.core._ import spinal.lib._ import vexriscv.ip._ import spinal.lib.bus.avalon.AvalonMM import spinal.lib.eda.altera.{InterruptReceiverTag, ResetEmitterTag} +import vexriscv.ip.fpu.FpuParameter // make clean all SEED=42 MMU=no STOP_ON_ERROR=yes DBUS_EXCLUSIVE=yes DBUS_INVALIDATE=yes SUPERVISOR=yes REDO=1 DHRYSTONE=yes LRSC=yes AMO=yes LINUX_REGRESSION=yes TRACE=yes TRACE_START=1000000000 FLOW_INFO=ye IBUS_DATA_WIDTH=128 DBUS_DATA_WIDTH=128 //make clean all SEED=42 MMU=no STOP_ON_ERROR=yes DBUS_EXCLUSIVE=yes DBUS_INVALIDATE=yes SUPERVISOR=yes REDO=1 DHRYSTONE=yes LRSC=yes AMO=yes TRACE=yes TRACE_START=1000000000 FLOW_INFO=ye IBUS_DATA_WIDTH=128 DBUS_DATA_WIDTH=128 LINUX_SOC_SMP=yes VMLINUX=../../../../../buildroot/output/images/Image RAMDISK=../../../../../buildroot/output/images/rootfs.cpio DTB=../../../../../buildroot/output/images/dtb EMULATOR=../../../../../opensbi/build/platform/spinal/vexriscv/sim/smp/firmware/fw_jump.bin object TestsWorkspace { def main(args: Array[String]) { - def configFull = { - val config = VexRiscvConfig( - plugins = List( - new MmuPlugin( - ioRange = x => x(31 downto 28) === 0xF - ), - //Uncomment the whole IBusSimplePlugin and comment IBusCachedPlugin if you want uncached iBus config - // new IBusSimplePlugin( - // resetVector = 0x80000000l, - // cmdForkOnSecondStage = false, - // cmdForkPersistence = false, - // prediction = DYNAMIC_TARGET, - // historyRamSizeLog2 = 10, - // catchAccessFault = true, - // compressedGen = true, - // busLatencyMin = 1, - // injectorStage = true, - // memoryTranslatorPortConfig = withMmu generate MmuPortConfig( - // portTlbSize = 4 - // ) - // ), - - //Uncomment the whole IBusCachedPlugin and comment IBusSimplePlugin if you want cached iBus config - new IBusCachedPlugin( - resetVector = 0x80000000l, - compressedGen = false, - prediction = STATIC, - injectorStage = false, - config = InstructionCacheConfig( - cacheSize = 4096*2, - bytePerLine = 64, - wayCount = 2, - addressWidth = 32, - cpuDataWidth = 32, - memDataWidth = 128, - catchIllegalAccess = true, - catchAccessFault = true, - asyncTagMemory = false, - twoCycleRam = true, - twoCycleCache = true, - reducedBankWidth = true - // ) - ), - memoryTranslatorPortConfig = MmuPortConfig( - portTlbSize = 4, - latency = 1, - earlyRequireMmuLockup = true, - earlyCacheHits = true - ) - ), - // ).newTightlyCoupledPort(TightlyCoupledPortParameter("iBusTc", a => a(30 downto 28) === 0x0 && a(5))), - // new DBusSimplePlugin( - // catchAddressMisaligned = true, - // catchAccessFault = true, - // earlyInjection = false, - // withLrSc = true, - // memoryTranslatorPortConfig = withMmu generate MmuPortConfig( - // portTlbSize = 4 - // ) - // ), - new DBusCachedPlugin( - dBusCmdMasterPipe = true, - dBusCmdSlavePipe = true, - dBusRspSlavePipe = true, - config = new DataCacheConfig( - cacheSize = 4096*1, - bytePerLine = 64, - wayCount = 1, - addressWidth = 32, - cpuDataWidth = 32, - memDataWidth = 128, - catchAccessError = true, - catchIllegal = true, - catchUnaligned = true, - withLrSc = true, - withAmo = true, - withExclusive = true, - withInvalidate = true, - pendingMax = 32 - // ) - ), - memoryTranslatorPortConfig = MmuPortConfig( - portTlbSize = 4, - latency = 1, - earlyRequireMmuLockup = true, - earlyCacheHits = true - ) - ), - - // new MemoryTranslatorPlugin( - // tlbSize = 32, - // virtualRange = _(31 downto 28) === 0xC, - // ioRange = _(31 downto 28) === 0xF - // ), - - new DecoderSimplePlugin( - catchIllegalInstruction = true - ), - new RegFilePlugin( - regFileReadyKind = plugin.ASYNC, - zeroBoot = true - ), - new IntAluPlugin, - new SrcPlugin( - separatedAddSub = false - ), - new FullBarrelShifterPlugin(earlyInjection = false), - // new LightShifterPlugin, - new HazardSimplePlugin( - bypassExecute = true, - bypassMemory = true, - bypassWriteBack = true, - bypassWriteBackBuffer = true, - pessimisticUseSrc = false, - pessimisticWriteRegFile = false, - pessimisticAddressMatch = false - ), - // new HazardSimplePlugin(false, true, false, true), - // new HazardSimplePlugin(false, false, false, false), - new MulPlugin, - new MulDivIterativePlugin( - genMul = false, - genDiv = true, - mulUnrollFactor = 32, - divUnrollFactor = 1 - ), - // new DivPlugin, - new CsrPlugin(CsrPluginConfig.all2(0x80000020l).copy(ebreakGen = false, misaExtensionsInit = Riscv.misaToInt("imas"))), - // new CsrPlugin(//CsrPluginConfig.all2(0x80000020l).copy(ebreakGen = true)/* - // CsrPluginConfig( - // catchIllegalAccess = false, - // mvendorid = null, - // marchid = null, - // mimpid = null, - // mhartid = null, - // misaExtensionsInit = 0, - // misaAccess = CsrAccess.READ_ONLY, - // mtvecAccess = CsrAccess.WRITE_ONLY, - // mtvecInit = 0x80000020l, - // mepcAccess = CsrAccess.READ_WRITE, - // mscratchGen = true, - // mcauseAccess = CsrAccess.READ_ONLY, - // mbadaddrAccess = CsrAccess.READ_ONLY, - // mcycleAccess = CsrAccess.NONE, - // minstretAccess = CsrAccess.NONE, - // ecallGen = true, - // ebreakGen = true, - // wfiGenAsWait = false, - // wfiGenAsNop = true, - // ucycleAccess = CsrAccess.NONE - // )), - new DebugPlugin(ClockDomain.current.clone(reset = Bool().setName("debugReset"))), - new BranchPlugin( - earlyBranch = false, - catchAddressMisaligned = true, - fenceiGenAsAJump = false - ), - new YamlPlugin("cpu0.yaml") - ) - ) - config - } +// def configFull = { +// val config = VexRiscvConfig( +// plugins = List( +// new MmuPlugin( +// ioRange = x => x(31 downto 28) === 0xF +// ), +// //Uncomment the whole IBusSimplePlugin and comment IBusCachedPlugin if you want uncached iBus config +// // new IBusSimplePlugin( +// // resetVector = 0x80000000l, +// // cmdForkOnSecondStage = false, +// // cmdForkPersistence = false, +// // prediction = DYNAMIC_TARGET, +// // historyRamSizeLog2 = 10, +// // catchAccessFault = true, +// // compressedGen = true, +// // busLatencyMin = 1, +// // injectorStage = true, +// // memoryTranslatorPortConfig = withMmu generate MmuPortConfig( +// // portTlbSize = 4 +// // ) +// // ), +// +// //Uncomment the whole IBusCachedPlugin and comment IBusSimplePlugin if you want cached iBus config +// new IBusCachedPlugin( +// resetVector = 0x80000000l, +// compressedGen = false, +// prediction = STATIC, +// injectorStage = false, +// config = InstructionCacheConfig( +// cacheSize = 4096*2, +// bytePerLine = 64, +// wayCount = 2, +// addressWidth = 32, +// cpuDataWidth = 32, +// memDataWidth = 128, +// catchIllegalAccess = true, +// catchAccessFault = true, +// asyncTagMemory = false, +// twoCycleRam = true, +// twoCycleCache = true, +// reducedBankWidth = true +// // ) +// ), +// memoryTranslatorPortConfig = MmuPortConfig( +// portTlbSize = 4, +// latency = 1, +// earlyRequireMmuLockup = true, +// earlyCacheHits = true +// ) +// ), +// // ).newTightlyCoupledPort(TightlyCoupledPortParameter("iBusTc", a => a(30 downto 28) === 0x0 && a(5))), +// // new DBusSimplePlugin( +// // catchAddressMisaligned = true, +// // catchAccessFault = true, +// // earlyInjection = false, +// // withLrSc = true, +// // memoryTranslatorPortConfig = withMmu generate MmuPortConfig( +// // portTlbSize = 4 +// // ) +// // ), +// new DBusCachedPlugin( +// dBusCmdMasterPipe = true, +// dBusCmdSlavePipe = true, +// dBusRspSlavePipe = true, +// config = new DataCacheConfig( +// cacheSize = 4096*1, +// bytePerLine = 64, +// wayCount = 1, +// addressWidth = 32, +// cpuDataWidth = 32, +// memDataWidth = 128, +// catchAccessError = true, +// catchIllegal = true, +// catchUnaligned = true, +// withLrSc = true, +// withAmo = true, +// withExclusive = true, +// withInvalidate = true, +// pendingMax = 32 +// // ) +// ), +// memoryTranslatorPortConfig = MmuPortConfig( +// portTlbSize = 4, +// latency = 1, +// earlyRequireMmuLockup = true, +// earlyCacheHits = true +// ) +// ), +// +// // new MemoryTranslatorPlugin( +// // tlbSize = 32, +// // virtualRange = _(31 downto 28) === 0xC, +// // ioRange = _(31 downto 28) === 0xF +// // ), +// +// new DecoderSimplePlugin( +// catchIllegalInstruction = true +// ), +// new RegFilePlugin( +// regFileReadyKind = plugin.ASYNC, +// zeroBoot = true +// ), +// new IntAluPlugin, +// new SrcPlugin( +// separatedAddSub = false +// ), +// new FullBarrelShifterPlugin(earlyInjection = false), +// // new LightShifterPlugin, +// new HazardSimplePlugin( +// bypassExecute = true, +// bypassMemory = true, +// bypassWriteBack = true, +// bypassWriteBackBuffer = true, +// pessimisticUseSrc = false, +// pessimisticWriteRegFile = false, +// pessimisticAddressMatch = false +// ), +// // new HazardSimplePlugin(false, true, false, true), +// // new HazardSimplePlugin(false, false, false, false), +// new MulPlugin, +// new MulDivIterativePlugin( +// genMul = false, +// genDiv = true, +// mulUnrollFactor = 32, +// divUnrollFactor = 1 +// ), +// // new DivPlugin, +// new CsrPlugin(CsrPluginConfig.all2(0x80000020l).copy(ebreakGen = false, misaExtensionsInit = Riscv.misaToInt("imas"))), +// // new CsrPlugin(//CsrPluginConfig.all2(0x80000020l).copy(ebreakGen = true)/* +// // CsrPluginConfig( +// // catchIllegalAccess = false, +// // mvendorid = null, +// // marchid = null, +// // mimpid = null, +// // mhartid = null, +// // misaExtensionsInit = 0, +// // misaAccess = CsrAccess.READ_ONLY, +// // mtvecAccess = CsrAccess.WRITE_ONLY, +// // mtvecInit = 0x80000020l, +// // mepcAccess = CsrAccess.READ_WRITE, +// // mscratchGen = true, +// // mcauseAccess = CsrAccess.READ_ONLY, +// // mbadaddrAccess = CsrAccess.READ_ONLY, +// // mcycleAccess = CsrAccess.NONE, +// // minstretAccess = CsrAccess.NONE, +// // ecallGen = true, +// // ebreakGen = true, +// // wfiGenAsWait = false, +// // wfiGenAsNop = true, +// // ucycleAccess = CsrAccess.NONE +// // )), +// new DebugPlugin(ClockDomain.current.clone(reset = Bool().setName("debugReset"))), +// new BranchPlugin( +// earlyBranch = false, +// catchAddressMisaligned = true, +// fenceiGenAsAJump = false +// ), +// new YamlPlugin("cpu0.yaml") +// ) +// ) +// config +// } // import spinal.core.sim._ @@ -211,10 +212,17 @@ object TestsWorkspace { // } // } - SpinalConfig(mergeAsyncProcess = false, anonymSignalPrefix = "zz_").generateVerilog { + SpinalConfig().generateVerilog { - - val toplevel = new VexRiscv(configFull) + val config = GenFull.config + config.plugins += new FpuPlugin( + externalFpu = false, + p = FpuParameter( + internalMantissaSize = 23, + withDouble = false + ) + ) + val toplevel = new VexRiscv(config) // val toplevel = new VexRiscv(configLight) // val toplevel = new VexRiscv(configTest) diff --git a/src/main/scala/vexriscv/VexRiscv.scala b/src/main/scala/vexriscv/VexRiscv.scala index 5f7865c..a08f9c9 100644 --- a/src/main/scala/vexriscv/VexRiscv.scala +++ b/src/main/scala/vexriscv/VexRiscv.scala @@ -46,6 +46,7 @@ case class VexRiscvConfig(){ object LEGAL_INSTRUCTION extends Stageable(Bool) object REGFILE_WRITE_VALID extends Stageable(Bool) object REGFILE_WRITE_DATA extends Stageable(Bits(32 bits)) + object DBUS_DATA extends Stageable(Bits(32 bits)) object MPP extends PipelineThing[UInt] object DEBUG_BYPASS_CACHE extends PipelineThing[Bool] diff --git a/src/main/scala/vexriscv/demo/GenFull.scala b/src/main/scala/vexriscv/demo/GenFull.scala index dfa7b1e..eb1dba3 100644 --- a/src/main/scala/vexriscv/demo/GenFull.scala +++ b/src/main/scala/vexriscv/demo/GenFull.scala @@ -9,82 +9,84 @@ import spinal.core._ * Created by spinalvm on 15.06.17. */ object GenFull extends App{ - def cpu() = new VexRiscv( - config = VexRiscvConfig( - plugins = List( - new IBusCachedPlugin( - prediction = DYNAMIC, - config = InstructionCacheConfig( - cacheSize = 4096, - bytePerLine =32, - wayCount = 1, - addressWidth = 32, - cpuDataWidth = 32, - memDataWidth = 32, - catchIllegalAccess = true, - catchAccessFault = true, - asyncTagMemory = false, - twoCycleRam = true, - twoCycleCache = true - ), - memoryTranslatorPortConfig = MmuPortConfig( - portTlbSize = 4 - ) + def config = VexRiscvConfig( + plugins = List( + new IBusCachedPlugin( + prediction = DYNAMIC, + config = InstructionCacheConfig( + cacheSize = 4096, + bytePerLine =32, + wayCount = 1, + addressWidth = 32, + cpuDataWidth = 32, + memDataWidth = 32, + catchIllegalAccess = true, + catchAccessFault = true, + asyncTagMemory = false, + twoCycleRam = true, + twoCycleCache = true ), - new DBusCachedPlugin( - config = new DataCacheConfig( - cacheSize = 4096, - bytePerLine = 32, - wayCount = 1, - addressWidth = 32, - cpuDataWidth = 32, - memDataWidth = 32, - catchAccessError = true, - catchIllegal = true, - catchUnaligned = true - ), - memoryTranslatorPortConfig = MmuPortConfig( - portTlbSize = 6 - ) + memoryTranslatorPortConfig = MmuPortConfig( + portTlbSize = 4 + ) + ), + new DBusCachedPlugin( + config = new DataCacheConfig( + cacheSize = 4096, + bytePerLine = 32, + wayCount = 1, + addressWidth = 32, + cpuDataWidth = 32, + memDataWidth = 32, + catchAccessError = true, + catchIllegal = true, + catchUnaligned = true ), - new MmuPlugin( - virtualRange = _(31 downto 28) === 0xC, - ioRange = _(31 downto 28) === 0xF - ), - new DecoderSimplePlugin( - catchIllegalInstruction = true - ), - new RegFilePlugin( - regFileReadyKind = plugin.SYNC, - zeroBoot = false - ), - new IntAluPlugin, - new SrcPlugin( - separatedAddSub = false, - executeInsertion = true - ), - new FullBarrelShifterPlugin, - new HazardSimplePlugin( - bypassExecute = true, - bypassMemory = true, - bypassWriteBack = true, - bypassWriteBackBuffer = true, - pessimisticUseSrc = false, - pessimisticWriteRegFile = false, - pessimisticAddressMatch = false - ), - new MulPlugin, - new DivPlugin, - new CsrPlugin(CsrPluginConfig.small(0x80000020l)), - new DebugPlugin(ClockDomain.current.clone(reset = Bool().setName("debugReset"))), - new BranchPlugin( - earlyBranch = false, - catchAddressMisaligned = true - ), - new YamlPlugin("cpu0.yaml") - ) + memoryTranslatorPortConfig = MmuPortConfig( + portTlbSize = 6 + ) + ), + new MmuPlugin( + virtualRange = _(31 downto 28) === 0xC, + ioRange = _(31 downto 28) === 0xF + ), + new DecoderSimplePlugin( + catchIllegalInstruction = true + ), + new RegFilePlugin( + regFileReadyKind = plugin.SYNC, + zeroBoot = false + ), + new IntAluPlugin, + new SrcPlugin( + separatedAddSub = false, + executeInsertion = true + ), + new FullBarrelShifterPlugin, + new HazardSimplePlugin( + bypassExecute = true, + bypassMemory = true, + bypassWriteBack = true, + bypassWriteBackBuffer = true, + pessimisticUseSrc = false, + pessimisticWriteRegFile = false, + pessimisticAddressMatch = false + ), + new MulPlugin, + new DivPlugin, + new CsrPlugin(CsrPluginConfig.small(0x80000020l)), + new DebugPlugin(ClockDomain.current.clone(reset = Bool().setName("debugReset"))), + new BranchPlugin( + earlyBranch = false, + catchAddressMisaligned = true + ), + new YamlPlugin("cpu0.yaml") ) ) + def cpu() = new VexRiscv( + config + ) + SpinalVerilog(cpu()) } diff --git a/src/main/scala/vexriscv/ip/DataCache.scala b/src/main/scala/vexriscv/ip/DataCache.scala index eff1097..97f803c 100644 --- a/src/main/scala/vexriscv/ip/DataCache.scala +++ b/src/main/scala/vexriscv/ip/DataCache.scala @@ -120,7 +120,6 @@ case class DataCacheCpuExecute(p : DataCacheConfig) extends Bundle with IMasterS case class DataCacheCpuExecuteArgs(p : DataCacheConfig) extends Bundle{ val wr = Bool - val data = Bits(p.cpuDataWidth bit) val size = UInt(2 bits) val isLrsc = p.withLrSc generate Bool() val isAmo = p.withAmo generate Bool() @@ -169,6 +168,7 @@ case class DataCacheCpuWriteBack(p : DataCacheConfig) extends Bundle with IMaste val isUser = Bool() val haltIt = Bool() val isWrite = Bool() + val storeData = Bits(p.cpuDataWidth bit) val data = Bits(p.cpuDataWidth bit) val address = UInt(p.addressWidth bit) val mmuException, unalignedAccess, accessError = Bool() @@ -176,7 +176,7 @@ case class DataCacheCpuWriteBack(p : DataCacheConfig) extends Bundle with IMaste val fence = FenceFlags() override def asMaster(): Unit = { - out(isValid,isStuck,isUser, address, fence) + out(isValid,isStuck,isUser, address, fence, storeData) in(haltIt, data, mmuException, unalignedAccess, accessError, isWrite, keepMemRspData) } } @@ -804,35 +804,32 @@ class DataCache(val p : DataCacheConfig, mmuParameter : MemoryTranslatorBusParam val ioMemRspMuxed = io.mem.rsp.data.subdivideIn(cpuDataWidth bits).read(io.cpu.writeBack.address(memWordToCpuWordRange)) - io.cpu.writeBack.haltIt := io.cpu.writeBack.isValid + io.cpu.writeBack.haltIt := True //Evict the cache after reset logics val flusher = new Area { - val valid = RegInit(False) + val waitDone = RegInit(False) clearWhen(io.cpu.flush.ready) val hold = False - when(valid) { - tagsWriteCmd.valid := valid - tagsWriteCmd.address := mmuRsp.physicalAddress(lineRange) + val counter = Reg(UInt(lineRange.size + 1 bits)) init(0) + when(!counter.msb) { + tagsWriteCmd.valid := True + tagsWriteCmd.address := counter.resized tagsWriteCmd.way.setAll() tagsWriteCmd.data.valid := False - io.cpu.writeBack.haltIt := True + io.cpu.execute.haltIt := True when(!hold) { - when(mmuRsp.physicalAddress(lineRange) =/= wayLineCount - 1) { - mmuRsp.physicalAddress.getDrivingReg(lineRange) := mmuRsp.physicalAddress(lineRange) + 1 - } otherwise { - valid := False - } + counter := counter + 1 } } - io.cpu.flush.ready := False + io.cpu.flush.ready := waitDone && counter.msb + val start = RegInit(True) //Used to relax timings - start := !start && io.cpu.flush.valid && !io.cpu.execute.isValid && !io.cpu.memory.isValid && !io.cpu.writeBack.isValid && !io.cpu.redo + start := !waitDone && !start && io.cpu.flush.valid && !io.cpu.execute.isValid && !io.cpu.memory.isValid && !io.cpu.writeBack.isValid && !io.cpu.redo when(start){ - io.cpu.flush.ready := True - mmuRsp.physicalAddress.getDrivingReg(lineRange) := 0 - valid := True + waitDone := True + counter := 0 } } @@ -848,10 +845,10 @@ class DataCache(val p : DataCacheConfig, mmuParameter : MemoryTranslatorBusParam val isExternalLsrc = if(withExternalLrSc) request.isLrsc else False val isExternalAmo = if(withExternalAmo) request.isAmo else False - val requestDataBypass = CombInit(request.data) + val requestDataBypass = CombInit(io.cpu.writeBack.storeData) import DataCacheExternalAmoStates._ val amo = withAmo generate new Area{ - def rf = request.data + def rf = io.cpu.writeBack.storeData def mem = if(withInternalAmo) dataMux else ioMemRspMuxed val compare = request.amoCtrl.alu.msb val unsigned = request.amoCtrl.alu(2 downto 1) === B"11" diff --git a/src/main/scala/vexriscv/ip/fpu/FpuCore.scala b/src/main/scala/vexriscv/ip/fpu/FpuCore.scala index c64ddb3..c95f4ae 100644 --- a/src/main/scala/vexriscv/ip/fpu/FpuCore.scala +++ b/src/main/scala/vexriscv/ip/fpu/FpuCore.scala @@ -10,28 +10,31 @@ object FpuDivSqrtIterationState extends SpinalEnum{ val IDLE, YY, XYY, Y2_XYY, DIV, _15_XYY2, Y_15_XYY2, Y_15_XYY2_RESULT, SQRT = newElement() } -case class FpuCore(p : FpuParameter) extends Component{ +case class FpuCore( portCount : Int, p : FpuParameter) extends Component{ val io = new Bundle { - val port = slave(FpuPort(p)) + val port = Vec(slave(FpuPort(p)), portCount) } + val portCountWidth = log2Up(portCount) + val Source = HardType(UInt(portCountWidth bits)) -// val commitPerSourceCount = 8 + +// val commitPerportCount = 8 val rfLockCount = 5 val lockIdType = HardType(UInt(log2Up(rfLockCount) bits)) - io.port.rsp.valid := False - io.port.rsp.payload.assignDontCare() +// io.port.rsp.valid := False +// io.port.rsp.payload.assignDontCare() case class RfReadInput() extends Bundle{ - val source = p.source() + val source = Source() val opcode = p.Opcode() val rs1, rs2, rs3 = p.rfAddress() val rd = p.rfAddress() } case class RfReadOutput() extends Bundle{ - val source = p.source() + val source = Source() val opcode = p.Opcode() val lockId = lockIdType() val rs1, rs2, rs3 = p.internalFloating() @@ -40,19 +43,19 @@ case class FpuCore(p : FpuParameter) extends Component{ case class LoadInput() extends Bundle{ - val source = p.source() + val source = Source() val rs1 = p.internalFloating() val rd = p.rfAddress() val lockId = lockIdType() } case class StoreInput() extends Bundle{ - val source = p.source() + val source = Source() val rs2 = p.internalFloating() } case class MulInput() extends Bundle{ - val source = p.source() + val source = Source() val rs1, rs2, rs3 = p.internalFloating() val rd = p.rfAddress() val lockId = lockIdType() @@ -63,7 +66,7 @@ case class FpuCore(p : FpuParameter) extends Component{ } case class DivSqrtInput() extends Bundle{ - val source = p.source() + val source = Source() val rs1, rs2 = p.internalFloating() val rd = p.rfAddress() val lockId = lockIdType() @@ -71,14 +74,14 @@ case class FpuCore(p : FpuParameter) extends Component{ } case class AddInput() extends Bundle{ - val source = p.source() + val source = Source() val rs1, rs2 = p.internalFloating() val rd = p.rfAddress() val lockId = lockIdType() } case class WriteInput() extends Bundle{ - val source = p.source() + val source = Source() val lockId = lockIdType() val rd = p.rfAddress() val value = p.internalFloating() @@ -86,10 +89,10 @@ case class FpuCore(p : FpuParameter) extends Component{ val rf = new Area{ - val ram = Mem(p.internalFloating, 32*(1 << p.sourceWidth)) + val ram = Mem(p.internalFloating, 32*portCount) val lock = for(i <- 0 until rfLockCount) yield new Area{ val valid = RegInit(False) - val source = Reg(p.source) + val source = Reg(Source()) val address = Reg(p.rfAddress) val id = Reg(UInt(log2Up(rfLockCount) bits)) val commited = Reg(Bool) @@ -99,21 +102,21 @@ case class FpuCore(p : FpuParameter) extends Component{ val lockFreeId = OHMasking.first(lock.map(!_.valid)) } - val commitLogic = for(source <- 0 until p.sourceCount) yield new Area{ + val commitLogic = for(source <- 0 until portCount) yield new Area{ val fire = False val target, hit = Reg(UInt(log2Up(rfLockCount) bits)) init(0) when(fire){ hit := hit + 1 } - io.port.commit(source).ready := False - when(io.port.commit(source).valid) { + io.port(source).commit.ready := False + when(io.port(source).commit.valid) { for (lock <- rf.lock) { when(lock.valid && lock.source === source && lock.id === hit) { fire := True lock.commited := True - lock.write := io.port.commit(source).write - io.port.commit(source).ready := True + lock.write := io.port(source).commit.write + io.port(source).commit.ready := True } } } @@ -123,16 +126,20 @@ case class FpuCore(p : FpuParameter) extends Component{ // val valid = Bool() // val write = Bool() // } -// val commits = for(i <- 0 until p.sourceCount) yield new Area{ -// val lines = Vec(CommitLine(), commitPerSourceCount) +// val commits = for(i <- 0 until portCount) yield new Area{ +// val lines = Vec(CommitLine(), commitPerportCount) // lines.foreach(_.valid init(False)) // // } val read = new Area{ + val arbiter = StreamArbiterFactory.noLock.lowerFirst.build(FpuCmd(p), portCount) + arbiter.io.inputs <> Vec(io.port.map(_.cmd)) + val s0 = Stream(RfReadInput()) - s0.arbitrationFrom(io.port.cmd) - s0.payload.assignSomeByName(io.port.cmd.payload) + s0.arbitrationFrom(arbiter.io.output) + s0.source := arbiter.io.chosen + s0.payload.assignSomeByName(arbiter.io.output.payload) val useRs1, useRs2, useRs3, useRd = False switch(s0.opcode){ @@ -172,7 +179,7 @@ case class FpuCore(p : FpuParameter) extends Component{ val hits = List((useRs1, s0.rs1), (useRs2, s0.rs2), (useRs3, s0.rs3), (useRd, s0.rd)).map{case (use, reg) => use && rf.lock.map(l => l.valid && l.source === s0.source && l.address === reg).orR} val hazard = hits.orR when(s0.fire && useRd){ - for(i <- 0 until p.sourceCount){ + for(i <- 0 until portCount){ when(s0.source === i){ commitLogic(i).target := commitLogic(i).target + 1 } @@ -183,6 +190,7 @@ case class FpuCore(p : FpuParameter) extends Component{ rf.lock(i).source := s0.source rf.lock(i).address := s0.rd rf.lock(i).id := commitLogic.map(_.target).read(s0.source) + rf.lock(i).commited := False } } } @@ -194,9 +202,9 @@ case class FpuCore(p : FpuParameter) extends Component{ output.opcode := s1.opcode output.lockId := s1LockId output.rd := s1.rd - output.rs1 := rf.ram.readSync(s0.rs1,enable = !output.isStall) - output.rs2 := rf.ram.readSync(s0.rs2,enable = !output.isStall) - output.rs3 := rf.ram.readSync(s0.rs3,enable = !output.isStall) + output.rs1 := rf.ram.readSync(s0.source @@ s0.rs1,enable = !output.isStall) + output.rs2 := rf.ram.readSync(s0.source @@ s0.rs2,enable = !output.isStall) + output.rs3 := rf.ram.readSync(s0.source @@ s0.rs3,enable = !output.isStall) } val decode = new Area{ @@ -266,10 +274,10 @@ case class FpuCore(p : FpuParameter) extends Component{ val load = new Area{ val input = decode.load.stage() - def feed = io.port.load(input.source) + def feed = io.port(input.source).load val hazard = !feed.valid val output = input.haltWhen(hazard).swapPayload(WriteInput()) - io.port.load.foreach(_.ready := False) + io.port.foreach(_.load.ready := False) feed.ready := input.valid && output.ready output.source := input.source output.lockId := input.lockId @@ -281,11 +289,11 @@ case class FpuCore(p : FpuParameter) extends Component{ val store = new Area{ val input = decode.store.stage() - input.ready := io.port.rsp.ready - when(input.valid){ - io.port.rsp.valid := True - io.port.rsp.source := input.source - io.port.rsp.value := input.rs2.asBits + input.ready := io.port.map(_.rsp.ready).read(input.source) + for(i <- 0 until portCount){ + def rsp = io.port(i).rsp + rsp.valid := input.valid && input.source === i + rsp.value := input.rs2.asBits } } @@ -576,10 +584,10 @@ case class FpuCore(p : FpuParameter) extends Component{ object FpuSynthesisBench extends App{ val payloadType = HardType(Bits(8 bits)) - class Fpu(name : String, p : FpuParameter) extends Rtl{ + class Fpu(name : String, portCount : Int, p : FpuParameter) extends Rtl{ override def getName(): String = "Fpu_" + name override def getRtlPath(): String = getName() + ".v" - SpinalVerilog(new FpuCore(p){ + SpinalVerilog(new FpuCore(portCount, p){ setDefinitionName(Fpu.this.getName()) }) @@ -590,18 +598,18 @@ object FpuSynthesisBench extends App{ val rtls = ArrayBuffer[Fpu]() rtls += new Fpu( "32", + portCount = 1, FpuParameter( internalMantissaSize = 23, - withDouble = false, - sourceCount = 1 + withDouble = false ) ) rtls += new Fpu( "64", + portCount = 1, FpuParameter( internalMantissaSize = 52, - withDouble = true, - sourceCount = 1 + withDouble = true ) ) diff --git a/src/main/scala/vexriscv/ip/fpu/Interface.scala b/src/main/scala/vexriscv/ip/fpu/Interface.scala index 00e526a..3433602 100644 --- a/src/main/scala/vexriscv/ip/fpu/Interface.scala +++ b/src/main/scala/vexriscv/ip/fpu/Interface.scala @@ -32,23 +32,19 @@ object FpuFormat extends SpinalEnum{ case class FpuParameter( internalMantissaSize : Int, - withDouble : Boolean, - sourceCount : Int){ + withDouble : Boolean){ val storeLoadType = HardType(Bits(if(withDouble) 64 bits else 32 bits)) val internalExponentSize = if(withDouble) 11 else 8 val internalFloating = HardType(FpuFloat(exponentSize = internalExponentSize, mantissaSize = internalMantissaSize)) -// val opcode = HardType(UInt(2 bits)) - val source = HardType(UInt(sourceWidth bits)) + val rfAddress = HardType(UInt(5 bits)) val Opcode = FpuOpcode val Format = FpuFormat - val sourceWidth = log2Up(sourceCount) } case class FpuCmd(p : FpuParameter) extends Bundle{ - val source = UInt(p.sourceWidth bits) val opcode = p.Opcode() val value = Bits(32 bits) // Int to float val function = Bits(3 bits) // Int to float @@ -66,19 +62,17 @@ case class FpuLoad(p : FpuParameter) extends Bundle{ } case class FpuRsp(p : FpuParameter) extends Bundle{ - val source = UInt(p.sourceWidth bits) val value = p.storeLoadType() // IEEE754 store || Integer } case class FpuPort(p : FpuParameter) extends Bundle with IMasterSlave { val cmd = Stream(FpuCmd(p)) - val commit = Vec(Stream(FpuCommit(p)), p.sourceCount) - val load = Vec(Stream(FpuLoad(p)), p.sourceCount) + val commit = Stream(FpuCommit(p)) + val load = Stream(FpuLoad(p)) val rsp = Stream(FpuRsp(p)) override def asMaster(): Unit = { - master(cmd) - (commit ++ load).foreach(master(_)) + master(cmd, commit, load) slave(rsp) } } diff --git a/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala b/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala index 2d66a58..17062e4 100644 --- a/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala +++ b/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala @@ -18,13 +18,20 @@ class DAxiCachedPlugin(config : DataCacheConfig, memoryTranslatorPortConfig : An } } +trait DBusEncodingService { + def addLoadWordEncoding(key: MaskedLiteral): Unit + def addStoreWordEncoding(key: MaskedLiteral): Unit + def encodingHalt(): Unit + def bypassStore(data : Bits) : Unit +} + class DBusCachedPlugin(val config : DataCacheConfig, memoryTranslatorPortConfig : Any = null, dBusCmdMasterPipe : Boolean = false, dBusCmdSlavePipe : Boolean = false, dBusRspSlavePipe : Boolean = false, relaxedMemoryTranslationRegister : Boolean = false, - csrInfo : Boolean = false) extends Plugin[VexRiscv] with DBusAccessService { + csrInfo : Boolean = false) extends Plugin[VexRiscv] with DBusAccessService with DBusEncodingService { import config._ assert(!(config.withExternalAmo && !dBusRspSlavePipe)) assert(isPow2(cacheSize)) @@ -43,6 +50,54 @@ class DBusCachedPlugin(val config : DataCacheConfig, dBusAccess } + override def addLoadWordEncoding(key : MaskedLiteral): Unit = { + val decoderService = pipeline.service(classOf[DecoderService]) + val cfg = pipeline.config + import cfg._ + + decoderService.add( + key, + List( + SRC1_CTRL -> Src1CtrlEnum.RS, + SRC_USE_SUB_LESS -> False, + MEMORY_ENABLE -> True, + RS1_USE -> True, + IntAluPlugin.ALU_CTRL -> IntAluPlugin.AluCtrlEnum.ADD_SUB, + SRC2_CTRL -> Src2CtrlEnum.IMI, + // REGFILE_WRITE_VALID -> True, + // BYPASSABLE_EXECUTE_STAGE -> False, + // BYPASSABLE_MEMORY_STAGE -> False, + MEMORY_WR -> False + ) ++ (if(catchSomething) List(HAS_SIDE_EFFECT -> True) else Nil) + ) + } + override def addStoreWordEncoding(key : MaskedLiteral): Unit = { + val decoderService = pipeline.service(classOf[DecoderService]) + val cfg = pipeline.config + import cfg._ + + decoderService.add( + key, + List( + SRC1_CTRL -> Src1CtrlEnum.RS, + SRC_USE_SUB_LESS -> False, + MEMORY_ENABLE -> True, + RS1_USE -> True, + IntAluPlugin.ALU_CTRL -> IntAluPlugin.AluCtrlEnum.ADD_SUB, + SRC2_CTRL -> Src2CtrlEnum.IMS, +// RS2_USE -> True, + MEMORY_WR -> True + ) ++ (if(catchSomething) List(HAS_SIDE_EFFECT -> True) else Nil) + ) + } + + var haltFromEncoding : Bool = null + override def encodingHalt(): Unit = haltFromEncoding := True + + override def bypassStore(data: Bits): Unit = { + pipeline.stages.last.input(MEMORY_STORE_DATA) := data + } + object MEMORY_ENABLE extends Stageable(Bool) object MEMORY_MANAGMENT extends Stageable(Bool) object MEMORY_WR extends Stageable(Bool) @@ -53,6 +108,7 @@ class DBusCachedPlugin(val config : DataCacheConfig, object MEMORY_FORCE_CONSTISTENCY extends Stageable(Bool) object IS_DBUS_SHARING extends Stageable(Bool()) object MEMORY_VIRTUAL_ADDRESS extends Stageable(UInt(32 bits)) + object MEMORY_STORE_DATA extends Stageable(Bits(32 bits)) override def setup(pipeline: VexRiscv): Unit = { import Riscv._ @@ -164,6 +220,8 @@ class DBusCachedPlugin(val config : DataCacheConfig, privilegeService = pipeline.service(classOf[PrivilegeService]) pipeline.update(DEBUG_BYPASS_CACHE, False) + + haltFromEncoding = False } override def build(pipeline: VexRiscv): Unit = { @@ -240,7 +298,7 @@ class DBusCachedPlugin(val config : DataCacheConfig, cache.io.cpu.execute.isValid := arbitration.isValid && input(MEMORY_ENABLE) cache.io.cpu.execute.address := input(SRC_ADD).asUInt cache.io.cpu.execute.args.wr := input(MEMORY_WR) - cache.io.cpu.execute.args.data := size.mux( + insert(MEMORY_STORE_DATA) := size.mux( U(0) -> input(RS2)( 7 downto 0) ## input(RS2)( 7 downto 0) ## input(RS2)(7 downto 0) ## input(RS2)(7 downto 0), U(1) -> input(RS2)(15 downto 0) ## input(RS2)(15 downto 0), default -> input(RS2)(31 downto 0) @@ -312,6 +370,7 @@ class DBusCachedPlugin(val config : DataCacheConfig, cache.io.cpu.writeBack.isStuck := arbitration.isStuck cache.io.cpu.writeBack.isUser := (if(privilegeService != null) privilegeService.isUser() else False) cache.io.cpu.writeBack.address := U(input(REGFILE_WRITE_DATA)) + cache.io.cpu.writeBack.storeData := input(MEMORY_STORE_DATA) val fence = if(withInvalidate) new Area { cache.io.cpu.writeBack.fence := input(INSTRUCTION)(31 downto 20).as(FenceFlags()) @@ -371,7 +430,7 @@ class DBusCachedPlugin(val config : DataCacheConfig, } } - arbitration.haltItself.setWhen(cache.io.cpu.writeBack.haltIt) + arbitration.haltItself.setWhen(cache.io.cpu.writeBack.isValid && cache.io.cpu.writeBack.haltIt) val rspShifted = Bits(32 bits) rspShifted := cache.io.cpu.writeBack.data @@ -390,6 +449,8 @@ class DBusCachedPlugin(val config : DataCacheConfig, when(arbitration.isValid && input(MEMORY_ENABLE)) { output(REGFILE_WRITE_DATA) := rspFormated } + + insert(DBUS_DATA) := cache.io.cpu.writeBack.data } //Share access to the dBus (used by self refilled MMU) @@ -405,7 +466,7 @@ class DBusCachedPlugin(val config : DataCacheConfig, dBusAccess.cmd.ready := !execute.arbitration.isStuck } cache.io.cpu.execute.args.wr := dBusAccess.cmd.write - cache.io.cpu.execute.args.data := dBusAccess.cmd.data + execute.insert(MEMORY_STORE_DATA) := dBusAccess.cmd.data cache.io.cpu.execute.args.size := dBusAccess.cmd.size if(withLrSc) cache.io.cpu.execute.args.isLrsc := False if(withAmo) cache.io.cpu.execute.args.isAmo := False @@ -435,6 +496,11 @@ class DBusCachedPlugin(val config : DataCacheConfig, } } + when(haltFromEncoding){ + cache.io.cpu.writeBack.isValid := False + managementStage.arbitration.haltItself := True + } + if(csrInfo){ val csr = service(classOf[CsrPlugin]) csr.r(0xCC0, 0 -> U(cacheSize/wayCount), 20 -> U(bytePerLine)) diff --git a/src/main/scala/vexriscv/plugin/FpuPlugin.scala b/src/main/scala/vexriscv/plugin/FpuPlugin.scala new file mode 100644 index 0000000..a438705 --- /dev/null +++ b/src/main/scala/vexriscv/plugin/FpuPlugin.scala @@ -0,0 +1,124 @@ +package vexriscv.plugin + +import spinal.core._ +import spinal.lib._ +import vexriscv._ +import vexriscv.Riscv._ +import vexriscv.ip.fpu._ + +class FpuPlugin(externalFpu : Boolean = false, + p : FpuParameter) extends Plugin[VexRiscv]{ + + object FPU_ENABLE extends Stageable(Bool()) + object FPU_COMMIT extends Stageable(Bool()) + object FPU_LOAD extends Stageable(Bool()) + object FPU_STORE extends Stageable(Bool()) + object FPU_ALU extends Stageable(Bool()) + object FPU_FORKED extends Stageable(Bool()) + object FPU_OPCODE extends Stageable(FpuOpcode()) + + var port : FpuPort = null + + override def setup(pipeline: VexRiscv): Unit = { + val decoderService = pipeline.service(classOf[DecoderService]) + decoderService.addDefault(FPU_ENABLE, False) + decoderService.add(List( + FADD_S -> List(FPU_ENABLE -> True, FPU_OPCODE -> FpuOpcode.ADD, FPU_COMMIT -> True, FPU_ALU -> True , FPU_LOAD -> False, FPU_STORE -> False), + FLW -> List(FPU_ENABLE -> True, FPU_OPCODE -> FpuOpcode.LOAD, FPU_COMMIT -> True, FPU_ALU -> False, FPU_LOAD -> True , FPU_STORE -> False), + FSW -> List(FPU_ENABLE -> True, FPU_OPCODE -> FpuOpcode.STORE,FPU_COMMIT -> False, FPU_ALU -> False, FPU_LOAD -> False, FPU_STORE -> True) + )) + + port = FpuPort(p) + if(externalFpu) master(port) + + val dBusEncoding = pipeline.service(classOf[DBusEncodingService]) + dBusEncoding.addLoadWordEncoding(FLW) + dBusEncoding.addStoreWordEncoding(FSW) + } + + override def build(pipeline: VexRiscv): Unit = { + import pipeline._ + import pipeline.config._ + + val internal = !externalFpu generate pipeline plug new Area{ + val fpu = FpuCore(1, p) + fpu.io.port(0).cmd << port.cmd + fpu.io.port(0).commit << port.commit + fpu.io.port(0).load << port.load + fpu.io.port(0).rsp >> port.rsp + + } + + + decode plug new Area{ + import decode._ + + //Maybe it might be better to not fork before fire to avoid RF stall on commits + val forked = Reg(Bool) setWhen(port.cmd.fire) clearWhen(!arbitration.isStuck) init(False) + + arbitration.haltItself setWhen(port.cmd.isStall) + port.cmd.valid := arbitration.isValid && input(FPU_ENABLE) && !forked + port.cmd.opcode := input(FPU_OPCODE) + port.cmd.value := output(RS1) + port.cmd.function := 0 + port.cmd.rs1 := input(INSTRUCTION)(rs1Range).asUInt + port.cmd.rs2 := input(INSTRUCTION)(rs2Range).asUInt + port.cmd.rs3 := input(INSTRUCTION)(rs3Range).asUInt + port.cmd.rd := input(INSTRUCTION)(rdRange).asUInt + port.cmd.format := FpuFormat.FLOAT + + insert(FPU_FORKED) := forked || port.cmd.fire + } + + memory plug new Area{ + import memory._ + + val isCommit = input(FPU_FORKED) && input(FPU_COMMIT) + + val commit = Stream(FpuCommit(p)) + commit.valid := isCommit && arbitration.isMoving + commit.write := arbitration.isValid + + arbitration.haltItself setWhen(isCommit && !commit.ready) //Assume commit.ready do not look at commit.valid + + port.commit <-/< commit //TODO can't commit in memory, in case a load fail + } + + writeBack plug new Area{ + import writeBack._ + + val dBusEncoding = pipeline.service(classOf[DBusEncodingService]) + val isLoad = input(FPU_FORKED) && input(FPU_LOAD) + val isStore = input(FPU_FORKED) && input(FPU_STORE) + + //Manage $store and port.rsp + port.rsp.ready := False + when(isStore){ + port.rsp.ready := True + when(arbitration.isValid) { + dBusEncoding.bypassStore(port.rsp.value) + } + when(!port.rsp.valid){ + dBusEncoding.encodingHalt() + } + } + + // Manage $load + val load = Stream(FpuLoad(p)) + load.valid := isLoad && arbitration.isMoving + load.value.assignFromBits(output(DBUS_DATA)) + + when(arbitration.isValid && !load.ready){ + dBusEncoding.encodingHalt() + } + + port.load <-/< load + } + + Component.current.afterElaboration{ + pipeline.stages.tail.foreach(_.input(FPU_FORKED).init(False)) + } + } + + +} diff --git a/src/test/cpp/raw/common/asm.mk b/src/test/cpp/raw/common/asm.mk index b63c80a..92b51ce 100644 --- a/src/test/cpp/raw/common/asm.mk +++ b/src/test/cpp/raw/common/asm.mk @@ -18,6 +18,9 @@ endif ifeq ($(COMPRESSED),yes) MARCH := $(MARCH)c endif +ifeq ($(FLOATING),yes) + MARCH := $(MARCH)fd +endif CFLAGS += -march=$(MARCH) -mabi=$(MABI) LDFLAGS += -march=$(MARCH) -mabi=$(MABI) diff --git a/src/test/cpp/raw/fpu/.gitignore b/src/test/cpp/raw/fpu/.gitignore new file mode 100644 index 0000000..c12cb2c --- /dev/null +++ b/src/test/cpp/raw/fpu/.gitignore @@ -0,0 +1,4 @@ +*.map +*.v +*.elf +*.o \ No newline at end of file diff --git a/src/test/cpp/raw/fpu/build/amo.asm b/src/test/cpp/raw/fpu/build/amo.asm new file mode 100644 index 0000000..d86b61c --- /dev/null +++ b/src/test/cpp/raw/fpu/build/amo.asm @@ -0,0 +1,247 @@ + +build/amo.elf: file format elf32-littleriscv + + +Disassembly of section .crt_section: + +80000000 <_start>: +80000000: 00100e13 li t3,1 +80000004: 00000097 auipc ra,0x0 +80000008: 27408093 addi ra,ra,628 # 80000278 +8000000c: 02d00113 li sp,45 +80000010: 0820a1af amoswap.w gp,sp,(ra) +80000014: 0000a203 lw tp,0(ra) +80000018: 02d00a13 li s4,45 +8000001c: 224a1663 bne s4,tp,80000248 +80000020: 00b00a13 li s4,11 +80000024: 223a1263 bne s4,gp,80000248 + +80000028 : +80000028: 00200e13 li t3,2 +8000002c: 00000097 auipc ra,0x0 +80000030: 25008093 addi ra,ra,592 # 8000027c +80000034: 03700113 li sp,55 +80000038: 0820a1af amoswap.w gp,sp,(ra) +8000003c: 0000a203 lw tp,0(ra) +80000040: 03700a13 li s4,55 +80000044: 204a1263 bne s4,tp,80000248 +80000048: 01600a13 li s4,22 +8000004c: 1e3a1e63 bne s4,gp,80000248 + +80000050 : +80000050: 00300e13 li t3,3 +80000054: 00000097 auipc ra,0x0 +80000058: 22c08093 addi ra,ra,556 # 80000280 +8000005c: 04200113 li sp,66 +80000060: 0020a1af amoadd.w gp,sp,(ra) +80000064: 0000a203 lw tp,0(ra) +80000068: 08b00a13 li s4,139 +8000006c: 1c4a1e63 bne s4,tp,80000248 +80000070: 04900a13 li s4,73 +80000074: 1c3a1a63 bne s4,gp,80000248 + +80000078 : +80000078: 00400e13 li t3,4 +8000007c: 00000097 auipc ra,0x0 +80000080: 20808093 addi ra,ra,520 # 80000284 +80000084: 05700113 li sp,87 +80000088: 2020a1af amoxor.w gp,sp,(ra) +8000008c: 0000a203 lw tp,0(ra) +80000090: 06d00a13 li s4,109 +80000094: 1a4a1a63 bne s4,tp,80000248 +80000098: 03a00a13 li s4,58 +8000009c: 1a3a1663 bne s4,gp,80000248 + +800000a0 : +800000a0: 00500e13 li t3,5 +800000a4: 00000097 auipc ra,0x0 +800000a8: 1e408093 addi ra,ra,484 # 80000288 +800000ac: 02c00113 li sp,44 +800000b0: 6020a1af amoand.w gp,sp,(ra) +800000b4: 0000a203 lw tp,0(ra) +800000b8: 02800a13 li s4,40 +800000bc: 184a1663 bne s4,tp,80000248 +800000c0: 03800a13 li s4,56 +800000c4: 183a1263 bne s4,gp,80000248 + +800000c8 : +800000c8: 00600e13 li t3,6 +800000cc: 00000097 auipc ra,0x0 +800000d0: 1c008093 addi ra,ra,448 # 8000028c +800000d4: 01800113 li sp,24 +800000d8: 4020a1af amoor.w gp,sp,(ra) +800000dc: 0000a203 lw tp,0(ra) +800000e0: 05b00a13 li s4,91 +800000e4: 164a1263 bne s4,tp,80000248 +800000e8: 04b00a13 li s4,75 +800000ec: 143a1e63 bne s4,gp,80000248 + +800000f0 : +800000f0: 00700e13 li t3,7 +800000f4: 00000097 auipc ra,0x0 +800000f8: 19c08093 addi ra,ra,412 # 80000290 +800000fc: 01800113 li sp,24 +80000100: 8020a1af amomin.w gp,sp,(ra) +80000104: 0000a203 lw tp,0(ra) +80000108: 01800a13 li s4,24 +8000010c: 124a1e63 bne s4,tp,80000248 +80000110: 03800a13 li s4,56 +80000114: 123a1a63 bne s4,gp,80000248 + +80000118 : +80000118: 00800e13 li t3,8 +8000011c: 00000097 auipc ra,0x0 +80000120: 17808093 addi ra,ra,376 # 80000294 +80000124: 05800113 li sp,88 +80000128: 8020a1af amomin.w gp,sp,(ra) +8000012c: 0000a203 lw tp,0(ra) +80000130: 05300a13 li s4,83 +80000134: 104a1a63 bne s4,tp,80000248 +80000138: 05300a13 li s4,83 +8000013c: 103a1663 bne s4,gp,80000248 + +80000140 : +80000140: 00900e13 li t3,9 +80000144: 00000097 auipc ra,0x0 +80000148: 15408093 addi ra,ra,340 # 80000298 +8000014c: fca00113 li sp,-54 +80000150: 8020a1af amomin.w gp,sp,(ra) +80000154: 0000a203 lw tp,0(ra) +80000158: fca00a13 li s4,-54 +8000015c: 0e4a1663 bne s4,tp,80000248 +80000160: 02100a13 li s4,33 +80000164: 0e3a1263 bne s4,gp,80000248 + +80000168 : +80000168: 00a00e13 li t3,10 +8000016c: 00000097 auipc ra,0x0 +80000170: 13008093 addi ra,ra,304 # 8000029c +80000174: 03400113 li sp,52 +80000178: 8020a1af amomin.w gp,sp,(ra) +8000017c: 0000a203 lw tp,0(ra) +80000180: fbf00a13 li s4,-65 +80000184: 0c4a1263 bne s4,tp,80000248 +80000188: fbf00a13 li s4,-65 +8000018c: 0a3a1e63 bne s4,gp,80000248 + +80000190 : +80000190: 00b00e13 li t3,11 +80000194: 00000097 auipc ra,0x0 +80000198: 10c08093 addi ra,ra,268 # 800002a0 +8000019c: fcc00113 li sp,-52 +800001a0: a020a1af amomax.w gp,sp,(ra) +800001a4: 0000a203 lw tp,0(ra) +800001a8: fcc00a13 li s4,-52 +800001ac: 084a1e63 bne s4,tp,80000248 +800001b0: fa900a13 li s4,-87 +800001b4: 083a1a63 bne s4,gp,80000248 + +800001b8 : +800001b8: 00c00e13 li t3,12 +800001bc: 00000097 auipc ra,0x0 +800001c0: 0e808093 addi ra,ra,232 # 800002a4 +800001c4: 03400113 li sp,52 +800001c8: a020a1af amomax.w gp,sp,(ra) +800001cc: 0000a203 lw tp,0(ra) +800001d0: 03400a13 li s4,52 +800001d4: 064a1a63 bne s4,tp,80000248 +800001d8: fc900a13 li s4,-55 +800001dc: 063a1663 bne s4,gp,80000248 + +800001e0 : +800001e0: 00d00e13 li t3,13 +800001e4: 00000097 auipc ra,0x0 +800001e8: 0c408093 addi ra,ra,196 # 800002a8 +800001ec: ffff0137 lui sp,0xffff0 +800001f0: c020a1af amominu.w gp,sp,(ra) +800001f4: 0000a203 lw tp,0(ra) +800001f8: ffff0a37 lui s4,0xffff0 +800001fc: 044a1663 bne s4,tp,80000248 +80000200: ffff0a37 lui s4,0xffff0 +80000204: 004a0a13 addi s4,s4,4 # ffff0004 +80000208: 043a1063 bne s4,gp,80000248 +8000020c: 0480006f j 80000254 + +80000210 : +80000210: 00e00e13 li t3,14 +80000214: 00000097 auipc ra,0x0 +80000218: 09808093 addi ra,ra,152 # 800002ac +8000021c: ffff0137 lui sp,0xffff0 +80000220: 00c10113 addi sp,sp,12 # ffff000c +80000224: e020a1af amomaxu.w gp,sp,(ra) +80000228: 0000a203 lw tp,0(ra) +8000022c: ffff0a37 lui s4,0xffff0 +80000230: 00ca0a13 addi s4,s4,12 # ffff000c +80000234: 004a1a63 bne s4,tp,80000248 +80000238: ffff0a37 lui s4,0xffff0 +8000023c: 005a0a13 addi s4,s4,5 # ffff0005 +80000240: 003a1463 bne s4,gp,80000248 +80000244: 0100006f j 80000254 + +80000248 : +80000248: f0100137 lui sp,0xf0100 +8000024c: f2410113 addi sp,sp,-220 # f00fff24 +80000250: 01c12023 sw t3,0(sp) + +80000254 : +80000254: f0100137 lui sp,0xf0100 +80000258: f2010113 addi sp,sp,-224 # f00fff20 +8000025c: 00012023 sw zero,0(sp) +80000260: 00000013 nop +80000264: 00000013 nop +80000268: 00000013 nop +8000026c: 00000013 nop +80000270: 00000013 nop +80000274: 00000013 nop + +80000278 : +80000278: 0000000b 0xb + +8000027c : +8000027c: 0016 c.slli zero,0x5 + ... + +80000280 : +80000280: 0049 c.nop 18 + ... + +80000284 : +80000284: 003a c.slli zero,0xe + ... + +80000288 : +80000288: 0038 addi a4,sp,8 + ... + +8000028c : +8000028c: 0000004b fnmsub.s ft0,ft0,ft0,ft0,rne + +80000290 : +80000290: 0038 addi a4,sp,8 + ... + +80000294 : +80000294: 00000053 fadd.s ft0,ft0,ft0,rne + +80000298 : +80000298: 0021 c.nop 8 + ... + +8000029c : +8000029c: ffffffbf 0xffffffbf + +800002a0 : +800002a0: ffa9 bnez a5,800001fa +800002a2: ffff 0xffff + +800002a4 : +800002a4: ffc9 bnez a5,8000023e +800002a6: ffff 0xffff + +800002a8 : +800002a8: 0004 0x4 +800002aa: ffff 0xffff + +800002ac : +800002ac: 0005 c.nop 1 +800002ae: ffff 0xffff diff --git a/src/test/cpp/raw/fpu/build/amo.hex b/src/test/cpp/raw/fpu/build/amo.hex new file mode 100644 index 0000000..74d3567 --- /dev/null +++ b/src/test/cpp/raw/fpu/build/amo.hex @@ -0,0 +1,45 @@ +:0200000480007A +:10000000130E100097000000938040271301D002C8 +:10001000AFA1200803A20000130AD00263164A22EF +:10002000130AB00063123A22130E2000970000005A +:100030009380002513017003AFA1200803A20000E4 +:10004000130A700363124A20130A6001631E3A1EEA +:10005000130E3000970000009380C022130120048B +:10006000AFA1200003A20000130AB008631E4A1CBF +:10007000130A9004631A3A1C130E40009700000004 +:100080009380802013017005AFA1202003A20000FF +:10009000130AD006631A4A1A130AA00363163A1AFF +:1000A000130E5000970000009380401E1301C00201 +:1000B000AFA1206003A20000130A800263164A1851 +:1000C000130A800363123A18130E600097000000B1 +:1000D0009380001C13018001AFA1204003A2000007 +:1000E000130AB00563124A16130AB004631E3A14C9 +:1000F000130E7000970000009380C0191301800157 +:10010000AFA1208003A20000130A8001631E4A12DF +:10011000130A8003631A3A12130E8000970000003E +:100120009380801713018005AFA1208003A20000F7 +:10013000130A3005631A4A10130A300563163A1081 +:10014000130E900097000000938040151301A0FC4F +:10015000AFA1208003A20000130AA0FC63164A0E80 +:10016000130A100263123A0E130EA000970000004B +:100170009380001313014003AFA1208003A200006D +:10018000130AF0FB63124A0C130AF0FB631E3A0ACF +:10019000130EB000970000009380C0101301C0FC44 +:1001A000AFA120A003A20000130AC0FC631E4A08EE +:1001B000130A90FA631A3A08130EC0009700000061 +:1001C0009380800E13014003AFA120A003A2000082 +:1001D000130A4003631A4A06130A90FC63163A0690 +:1001E000130ED000970000009380400C3701FFFFF2 +:1001F000AFA120C003A20000370AFFFF63164A0424 +:10020000370AFFFF130A4A0063103A046F008004A4 +:10021000130EE00097000000938080093701FFFF74 +:100220001301C100AFA120E003A20000370AFFFFC5 +:10023000130ACA00631A4A00370AFFFF130A5A005A +:1002400063143A006F000001370110F0130141F20E +:100250002320C101370110F0130101F22320010016 +:100260001300000013000000130000001300000042 +:1002700013000000130000000B0000001600000037 +:10028000490000003A000000380000004B00000068 +:10029000380000005300000021000000BFFFFFFFF6 +:1002A000A9FFFFFFC9FFFFFF0400FFFF0500FFFFDD +:00000001FF diff --git a/src/test/cpp/raw/fpu/build/fpu.asm b/src/test/cpp/raw/fpu/build/fpu.asm new file mode 100644 index 0000000..4c0a26f --- /dev/null +++ b/src/test/cpp/raw/fpu/build/fpu.asm @@ -0,0 +1,224 @@ + +build/fpu.elf: file format elf32-littleriscv + + +Disassembly of section .crt_section: + +80000000 <_start>: +80000000: 00100e13 li t3,1 +80000004: 00000013 nop +80000008: 00000013 nop +8000000c: 00000013 nop +80000010: 00000013 nop +80000014: 00107153 fadd.s ft2,ft0,ft1 +80000018: 00000013 nop +8000001c: 00000013 nop +80000020: 00000013 nop +80000024: 00000013 nop +80000028: 0180006f j 80000040 +8000002c: 00000013 nop +80000030: 00000013 nop +80000034: 00000013 nop +80000038: 00000013 nop +8000003c: 00000013 nop + +80000040 : +80000040: 00200e13 li t3,2 +80000044: 00000097 auipc ra,0x0 +80000048: 1fc0a083 lw ra,508(ra) # 80000240 +8000004c: 00107153 fadd.s ft2,ft0,ft1 +80000050: 00000013 nop +80000054: 00000013 nop +80000058: 00000013 nop +8000005c: 00000013 nop +80000060: 0200006f j 80000080 +80000064: 00000013 nop +80000068: 00000013 nop +8000006c: 00000013 nop +80000070: 00000013 nop +80000074: 00000013 nop +80000078: 00000013 nop +8000007c: 00000013 nop + +80000080 : +80000080: 00300e13 li t3,3 +80000084: 00000013 nop +80000088: 00000013 nop +8000008c: 00000013 nop +80000090: 00000013 nop +80000094: 0080006f j 8000009c +80000098: 00107153 fadd.s ft2,ft0,ft1 + +8000009c : +8000009c: 0240006f j 800000c0 +800000a0: 00000013 nop +800000a4: 00000013 nop +800000a8: 00000013 nop +800000ac: 00000013 nop +800000b0: 00000013 nop +800000b4: 00000013 nop +800000b8: 00000013 nop +800000bc: 00000013 nop + +800000c0 : +800000c0: 00400e13 li t3,4 +800000c4: 00000013 nop +800000c8: 00000013 nop +800000cc: 00000013 nop +800000d0: 00000013 nop +800000d4: 00000097 auipc ra,0x0 +800000d8: 16c08093 addi ra,ra,364 # 80000240 +800000dc: 0000a107 flw ft2,0(ra) +800000e0: 00000013 nop +800000e4: 00000013 nop +800000e8: 00000013 nop +800000ec: 00000013 nop +800000f0: 0100006f j 80000100 +800000f4: 00000013 nop +800000f8: 00000013 nop +800000fc: 00000013 nop + +80000100 : +80000100: 00500e13 li t3,5 +80000104: 00000013 nop +80000108: 00000013 nop +8000010c: 00000013 nop +80000110: 00000013 nop +80000114: 00000097 auipc ra,0x0 +80000118: 12c08093 addi ra,ra,300 # 80000240 +8000011c: 00000117 auipc sp,0x0 +80000120: 12810113 addi sp,sp,296 # 80000244 +80000124: 0000a087 flw ft1,0(ra) +80000128: 00012107 flw ft2,0(sp) +8000012c: 0020f1d3 fadd.s ft3,ft1,ft2 +80000130: 00000013 nop +80000134: 00000013 nop +80000138: 00000013 nop +8000013c: 00000013 nop +80000140: 0400006f j 80000180 +80000144: 00000013 nop +80000148: 00000013 nop +8000014c: 00000013 nop +80000150: 00000013 nop +80000154: 00000013 nop +80000158: 00000013 nop +8000015c: 00000013 nop +80000160: 00000013 nop +80000164: 00000013 nop +80000168: 00000013 nop +8000016c: 00000013 nop +80000170: 00000013 nop +80000174: 00000013 nop +80000178: 00000013 nop +8000017c: 00000013 nop + +80000180 : +80000180: 00600e13 li t3,6 +80000184: 00000013 nop +80000188: 00000013 nop +8000018c: 00000013 nop +80000190: 00000013 nop +80000194: 00000097 auipc ra,0x0 +80000198: 0b408093 addi ra,ra,180 # 80000248 +8000019c: 0030a027 fsw ft3,0(ra) +800001a0: 00000013 nop +800001a4: 00000013 nop +800001a8: 00000013 nop +800001ac: 00000013 nop +800001b0: 0100006f j 800001c0 +800001b4: 00000013 nop +800001b8: 00000013 nop +800001bc: 00000013 nop + +800001c0 : +800001c0: 00700e13 li t3,7 +800001c4: 00000097 auipc ra,0x0 +800001c8: 08408093 addi ra,ra,132 # 80000248 +800001cc: 00000117 auipc sp,0x0 +800001d0: 08010113 addi sp,sp,128 # 8000024c +800001d4: 00000197 auipc gp,0x0 +800001d8: 07c18193 addi gp,gp,124 # 80000250 +800001dc: 00000217 auipc tp,0x0 +800001e0: 07820213 addi tp,tp,120 # 80000254 +800001e4: 0000a207 flw ft4,0(ra) +800001e8: 00427253 fadd.s ft4,ft4,ft4 +800001ec: 0040f2d3 fadd.s ft5,ft1,ft4 +800001f0: 00412027 fsw ft4,0(sp) +800001f4: 0051a027 fsw ft5,0(gp) +800001f8: 00122027 fsw ft1,0(tp) # 0 <_start-0x80000000> +800001fc: 00000013 nop +80000200: 00000013 nop +80000204: 00000013 nop +80000208: 00000013 nop +8000020c: 0100006f j 8000021c + +80000210 : +80000210: f0100137 lui sp,0xf0100 +80000214: f2410113 addi sp,sp,-220 # f00fff24 +80000218: 01c12023 sw t3,0(sp) + +8000021c : +8000021c: f0100137 lui sp,0xf0100 +80000220: f2010113 addi sp,sp,-224 # f00fff20 +80000224: 00012023 sw zero,0(sp) +80000228: 00000013 nop +8000022c: 00000013 nop +80000230: 00000013 nop +80000234: 00000013 nop +80000238: 00000013 nop +8000023c: 00000013 nop + +80000240 : +80000240: 0000 unimp +80000242: 3fc0 fld fs0,184(a5) + +80000244 : +80000244: 0000 unimp +80000246: 40a0 lw s0,64(s1) + +80000248 : +80000248: 0049 c.nop 18 + ... + +8000024c : +8000024c: 003a c.slli zero,0xe + ... + +80000250 : +80000250: 0038 addi a4,sp,8 + ... + +80000254 : +80000254: 0000004b fnmsub.s ft0,ft0,ft0,ft0,rne + +80000258 : +80000258: 0038 addi a4,sp,8 + ... + +8000025c : +8000025c: 00000053 fadd.s ft0,ft0,ft0,rne + +80000260 : +80000260: 0021 c.nop 8 + ... + +80000264 : +80000264: ffffffbf 0xffffffbf + +80000268 : +80000268: ffa9 bnez a5,800001c2 +8000026a: ffff 0xffff + +8000026c : +8000026c: ffc9 bnez a5,80000206 +8000026e: ffff 0xffff + +80000270 : +80000270: 0004 0x4 +80000272: ffff 0xffff + +80000274 : +80000274: 0005 c.nop 1 +80000276: ffff 0xffff +80000278: 0000 unimp + ... diff --git a/src/test/cpp/raw/fpu/build/fpu.hex b/src/test/cpp/raw/fpu/build/fpu.hex new file mode 100644 index 0000000..58fefe0 --- /dev/null +++ b/src/test/cpp/raw/fpu/build/fpu.hex @@ -0,0 +1,42 @@ +:0200000480007A +:10000000130E100013000000130000001300000086 +:1000100013000000537110001300000013000000D3 +:1000200013000000130000006F00800113000000A7 +:100030001300000013000000130000001300000074 +:10004000130E20009700000083A0C01F5371100002 +:100050001300000013000000130000001300000054 +:100060006F000002130000001300000013000000E6 +:100070001300000013000000130000001300000034 +:10008000130E3000130000001300000013000000E6 +:10009000130000006F008000537110006F004002D9 +:1000A0001300000013000000130000001300000004 +:1000B00013000000130000001300000013000000F4 +:1000C000130E400013000000130000001300000096 +:1000D00013000000970000009380C01607A10000E5 +:1000E00013000000130000001300000013000000C4 +:1000F0006F00000113000000130000001300000057 +:10010000130E500013000000130000001300000045 +:1001100013000000970000009380C0121701000038 +:100120001301811287A0000007210100D3F12000F4 +:100130001300000013000000130000001300000073 +:100140006F00000413000000130000001300000003 +:100150001300000013000000130000001300000053 +:100160001300000013000000130000001300000043 +:100170001300000013000000130000001300000033 +:10018000130E6000130000001300000013000000B5 +:1001900013000000970000009380400B27A0300060 +:1001A0001300000013000000130000001300000003 +:1001B0006F00000113000000130000001300000096 +:1001C000130E700097000000938040081701000094 +:1001D00013010108970100009381C1071702000075 +:1001E0001302820707A2000053724200D3F24000BC +:1001F0002720410027A051002720120013000000F3 +:100200001300000013000000130000006F00000145 +:10021000370110F0130141F22320C101370110F022 +:10022000130101F22320010013000000130000005D +:100230001300000013000000130000001300000072 +:100240000000C03F0000A040490000003A0000004C +:10025000380000004B000000380000005300000090 +:1002600021000000BFFFFFFFA9FFFFFFC9FFFFFF45 +:0C0270000400FFFF0500FFFF000000007D +:00000001FF diff --git a/src/test/cpp/raw/fpu/makefile b/src/test/cpp/raw/fpu/makefile new file mode 100644 index 0000000..4892095 --- /dev/null +++ b/src/test/cpp/raw/fpu/makefile @@ -0,0 +1,5 @@ +PROJ_NAME=fpu + +FLOATING=yes + +include ../common/asm.mk \ No newline at end of file diff --git a/src/test/cpp/raw/fpu/src/crt.S b/src/test/cpp/raw/fpu/src/crt.S new file mode 100644 index 0000000..e0ed765 --- /dev/null +++ b/src/test/cpp/raw/fpu/src/crt.S @@ -0,0 +1,161 @@ +.globl _star +#define TEST_ID x28 + +_start: + +#define assert(reg, value) \ + li x20, value; \ + bne x20, reg, fail; + + +test1: + li TEST_ID, 1 + nop + nop + nop + nop + fadd.s f2, f0, f1 + nop + nop + nop + nop + j test2 + +.align 6 +test2: + li TEST_ID, 2 + lw x1, test1_data + fadd.s f2, f0, f1 + nop + nop + nop + nop + + j test3 + + +.align 6 +test3: + li TEST_ID, 3 + nop + nop + nop + nop + j skip + fadd.s f2, f0, f1 +skip: + j test4 + + + +.align 6 +test4: + li TEST_ID, 4 + nop + nop + nop + nop + la x1, test1_data + flw f2, 0(x1) + nop + nop + nop + nop + + j test5 + +.align 6 +test5: + li TEST_ID, 5 + nop + nop + nop + nop + la x1, test1_data + la x2, test2_data + flw f1, 0(x1) + flw f2, 0(x2) + fadd.s f3, f1, f2 + nop + nop + nop + nop + j test6 + +.align 6 +test6: + li TEST_ID, 6 + nop + nop + nop + nop + la x1, test3_data + fsw f3, 0(x1) + nop + nop + nop + nop + j test7 + +.align 6 +test7: + li TEST_ID, 7 + + la x1, test3_data + la x2, test4_data + la x3, test5_data + la x4, test6_data + flw f4, 0(x1) + fadd.s f4, f4, f4 + fadd.s f5, f1, f4 + fsw f4, 0(x2) + fsw f5, 0(x3) + fsw f1, 0(x4) + nop + nop + nop + nop + + /* la x1, test1_data + li x2, 45 + amoswap.w x3,x2,(x1) + lw x4, 0(x1) + assert(x4, 45) + assert(x3, 11)*/ + + + + + j pass + + +fail: + li x2, 0xF00FFF24 + sw TEST_ID, 0(x2) + +pass: + li x2, 0xF00FFF20 + sw x0, 0(x2) + + nop + nop + nop + nop + nop + nop + + +test1_data: .word 0x3fc00000 //1.5f +test2_data: .word 0x40a00000 //5.0f +test3_data: .word 73 +test4_data: .word 58 +test5_data: .word 56 +test6_data: .word 75 +test7_data: .word 56 +test8_data: .word 83 +test9_data: .word 33 +test10_data: .word -65 +test11_data: .word -87 +test12_data: .word -55 +test13_data: .word 0xFFFF0004 +test14_data: .word 0xFFFF0005 \ No newline at end of file diff --git a/src/test/cpp/raw/fpu/src/ld b/src/test/cpp/raw/fpu/src/ld new file mode 100644 index 0000000..93d8de8 --- /dev/null +++ b/src/test/cpp/raw/fpu/src/ld @@ -0,0 +1,16 @@ +OUTPUT_ARCH( "riscv" ) + +MEMORY { + onChipRam (W!RX)/*(RX)*/ : ORIGIN = 0x80000000, LENGTH = 128K +} + +SECTIONS +{ + + .crt_section : + { + . = ALIGN(4); + *crt.o(.text) + } > onChipRam + +} diff --git a/src/test/cpp/regression/main.cpp b/src/test/cpp/regression/main.cpp index bc960fb..f24ec74 100644 --- a/src/test/cpp/regression/main.cpp +++ b/src/test/cpp/regression/main.cpp @@ -1309,7 +1309,9 @@ public: } Workspace* withRiscvRef(){ + #ifdef WITH_RISCV_REF riscvRefEnable = true; + #endif return this; } @@ -3862,8 +3864,6 @@ int main(int argc, char **argv, char **env) { timespec startedAt = timer_start(); - - //#ifdef LITEX // LitexSoC("linux") // .withRiscvRef() diff --git a/src/test/cpp/regression/makefile b/src/test/cpp/regression/makefile index c7dcf5f..adc7d98 100644 --- a/src/test/cpp/regression/makefile +++ b/src/test/cpp/regression/makefile @@ -27,6 +27,7 @@ NO_STALL?=no DEBUG_PLUGIN?=STD DEBUG_PLUGIN_EXTERNAL?=no RUN_HEX=no +WITH_RISCV_REF=yes CUSTOM_SIMD_ADD?=no CUSTOM_CSR?=no DHRYSTONE=yes @@ -128,6 +129,10 @@ ifeq ($(COREMARK),yes) ADDCFLAGS += -CFLAGS -DCOREMARK endif +ifeq ($(WITH_RISCV_REF),yes) + ADDCFLAGS += -CFLAGS -DWITH_RISCV_REF +endif + ifneq ($(shell grep timerInterrupt ${VEXRISCV_FILE} -w),) diff --git a/src/test/scala/vexriscv/DhrystoneBench.scala b/src/test/scala/vexriscv/DhrystoneBench.scala index 31c8c14..a99b5d8 100644 --- a/src/test/scala/vexriscv/DhrystoneBench.scala +++ b/src/test/scala/vexriscv/DhrystoneBench.scala @@ -50,100 +50,100 @@ class DhrystoneBench extends FunSuite { } - for(withMemoryStage <- List(false, true)){ - val stages = if(withMemoryStage) "Three" else "Two" - getDmips( - name = s"Gen${stages}StageArty", - gen = SpinalVerilog(GenTwoThreeStage.cpu( - withMulDiv = false, - bypass = false, - barrielShifter = false, - withMemoryStage = withMemoryStage - )), - testCmd = "make clean run REDO=10 IBUS=SIMPLE DBUS=SIMPLE CSR=no MMU=no DEBUG_PLUGIN=no MUL=no DIV=no COREMARK=yes" - ) - getDmips( - name = s"Gen${stages}StageBarrielArty", - gen = SpinalVerilog(GenTwoThreeStage.cpu( - withMulDiv = false, - bypass = true, - barrielShifter = true, - withMemoryStage = withMemoryStage - )), - testCmd = "make clean run REDO=10 IBUS=SIMPLE DBUS=SIMPLE CSR=no MMU=no DEBUG_PLUGIN=no MUL=no DIV=no COREMARK=yes" - ) - getDmips( - name = s"Gen${stages}StageMDArty", - gen = SpinalVerilog(GenTwoThreeStage.cpu( - withMulDiv = true, - bypass = false, - barrielShifter = false, - withMemoryStage = withMemoryStage - )), - testCmd = "make clean run REDO=10 IBUS=SIMPLE DBUS=SIMPLE CSR=no MMU=no DEBUG_PLUGIN=no MUL=yes DIV=yes COREMARK=yes" - ) - getDmips( - name = s"Gen${stages}StageMDBarrielArty", - gen = SpinalVerilog(GenTwoThreeStage.cpu( - withMulDiv = true, - bypass = true, - barrielShifter = true, - withMemoryStage = withMemoryStage - )), - testCmd = "make clean run REDO=10 IBUS=SIMPLE DBUS=SIMPLE CSR=no MMU=no DEBUG_PLUGIN=no MUL=yes DIV=yes COREMARK=yes" - ) - } - - getDmips( - name = "GenSmallestNoCsr", - gen = GenSmallestNoCsr.main(null), - testCmd = "make clean run REDO=10 IBUS=SIMPLE DBUS=SIMPLE CSR=no MMU=no DEBUG_PLUGIN=no MUL=no DIV=no COREMARK=yes" - ) - - - getDmips( - name = "GenSmallest", - gen = GenSmallest.main(null), - testCmd = "make clean run REDO=10 IBUS=SIMPLE DBUS=SIMPLE CSR=no MMU=no DEBUG_PLUGIN=no MUL=no DIV=no COREMARK=yes" - ) - - - getDmips( - name = "GenSmallAndProductive", - gen = GenSmallAndProductive.main(null), - testCmd = "make clean run REDO=10 IBUS=SIMPLE DBUS=SIMPLE CSR=no MMU=no DEBUG_PLUGIN=no MUL=no DIV=no COREMARK=yes" - ) - - getDmips( - name = "GenSmallAndProductiveWithICache", - gen = GenSmallAndProductiveICache.main(null), - testCmd = "make clean run REDO=10 IBUS=CACHED DBUS=SIMPLE CSR=no MMU=no DEBUG_PLUGIN=no MUL=no DIV=no COREMARK=yes" - ) - - - getDmips( - name = "GenFullNoMmuNoCache", - gen = GenFullNoMmuNoCache.main(null), - testCmd = "make clean run REDO=10 IBUS=SIMPLE DBUS=SIMPLE CSR=no MMU=no COREMARK=yes" - ) - - getDmips( - name = "GenNoCacheNoMmuMaxPerf", - gen = GenNoCacheNoMmuMaxPerf.main(null), - testCmd = "make clean run REDO=10 MMU=no CSR=no DBUS=SIMPLE IBUS=SIMPLE COREMARK=yes" - ) - - - getDmips( - name = "GenFullNoMmuMaxPerf", - gen = GenFullNoMmuMaxPerf.main(null), - testCmd = "make clean run REDO=10 MMU=no CSR=no COREMARK=yes" - ) - getDmips( - name = "GenFullNoMmu", - gen = GenFullNoMmu.main(null), - testCmd = "make clean run REDO=10 MMU=no CSR=no COREMARK=yes" - ) +// for(withMemoryStage <- List(false, true)){ +// val stages = if(withMemoryStage) "Three" else "Two" +// getDmips( +// name = s"Gen${stages}StageArty", +// gen = SpinalVerilog(GenTwoThreeStage.cpu( +// withMulDiv = false, +// bypass = false, +// barrielShifter = false, +// withMemoryStage = withMemoryStage +// )), +// testCmd = "make clean run REDO=10 IBUS=SIMPLE DBUS=SIMPLE CSR=no MMU=no DEBUG_PLUGIN=no MUL=no DIV=no COREMARK=yes" +// ) +// getDmips( +// name = s"Gen${stages}StageBarrielArty", +// gen = SpinalVerilog(GenTwoThreeStage.cpu( +// withMulDiv = false, +// bypass = true, +// barrielShifter = true, +// withMemoryStage = withMemoryStage +// )), +// testCmd = "make clean run REDO=10 IBUS=SIMPLE DBUS=SIMPLE CSR=no MMU=no DEBUG_PLUGIN=no MUL=no DIV=no COREMARK=yes" +// ) +// getDmips( +// name = s"Gen${stages}StageMDArty", +// gen = SpinalVerilog(GenTwoThreeStage.cpu( +// withMulDiv = true, +// bypass = false, +// barrielShifter = false, +// withMemoryStage = withMemoryStage +// )), +// testCmd = "make clean run REDO=10 IBUS=SIMPLE DBUS=SIMPLE CSR=no MMU=no DEBUG_PLUGIN=no MUL=yes DIV=yes COREMARK=yes" +// ) +// getDmips( +// name = s"Gen${stages}StageMDBarrielArty", +// gen = SpinalVerilog(GenTwoThreeStage.cpu( +// withMulDiv = true, +// bypass = true, +// barrielShifter = true, +// withMemoryStage = withMemoryStage +// )), +// testCmd = "make clean run REDO=10 IBUS=SIMPLE DBUS=SIMPLE CSR=no MMU=no DEBUG_PLUGIN=no MUL=yes DIV=yes COREMARK=yes" +// ) +// } +// +// getDmips( +// name = "GenSmallestNoCsr", +// gen = GenSmallestNoCsr.main(null), +// testCmd = "make clean run REDO=10 IBUS=SIMPLE DBUS=SIMPLE CSR=no MMU=no DEBUG_PLUGIN=no MUL=no DIV=no COREMARK=yes" +// ) +// +// +// getDmips( +// name = "GenSmallest", +// gen = GenSmallest.main(null), +// testCmd = "make clean run REDO=10 IBUS=SIMPLE DBUS=SIMPLE CSR=no MMU=no DEBUG_PLUGIN=no MUL=no DIV=no COREMARK=yes" +// ) +// +// +// getDmips( +// name = "GenSmallAndProductive", +// gen = GenSmallAndProductive.main(null), +// testCmd = "make clean run REDO=10 IBUS=SIMPLE DBUS=SIMPLE CSR=no MMU=no DEBUG_PLUGIN=no MUL=no DIV=no COREMARK=yes" +// ) +// +// getDmips( +// name = "GenSmallAndProductiveWithICache", +// gen = GenSmallAndProductiveICache.main(null), +// testCmd = "make clean run REDO=10 IBUS=CACHED DBUS=SIMPLE CSR=no MMU=no DEBUG_PLUGIN=no MUL=no DIV=no COREMARK=yes" +// ) +// +// +// getDmips( +// name = "GenFullNoMmuNoCache", +// gen = GenFullNoMmuNoCache.main(null), +// testCmd = "make clean run REDO=10 IBUS=SIMPLE DBUS=SIMPLE CSR=no MMU=no COREMARK=yes" +// ) +// +// getDmips( +// name = "GenNoCacheNoMmuMaxPerf", +// gen = GenNoCacheNoMmuMaxPerf.main(null), +// testCmd = "make clean run REDO=10 MMU=no CSR=no DBUS=SIMPLE IBUS=SIMPLE COREMARK=yes" +// ) +// +// +// getDmips( +// name = "GenFullNoMmuMaxPerf", +// gen = GenFullNoMmuMaxPerf.main(null), +// testCmd = "make clean run REDO=10 MMU=no CSR=no COREMARK=yes" +// ) +// getDmips( +// name = "GenFullNoMmu", +// gen = GenFullNoMmu.main(null), +// testCmd = "make clean run REDO=10 MMU=no CSR=no COREMARK=yes" +// ) getDmips( name = "GenFull", @@ -151,12 +151,11 @@ class DhrystoneBench extends FunSuite { testCmd = "make clean run REDO=10 CSR=no MMU=no COREMARK=yes" ) - getDmips( - name = "GenLinuxBalenced", - gen = LinuxGen.main(Array.fill[String](0)("")), - testCmd = "make clean run IBUS=CACHED DBUS=CACHED DEBUG_PLUGIN=STD DHRYSTONE=yes SUPERVISOR=yes MMU=no CSR=yes CSR_SKIP_TEST=yes COMPRESSED=no MUL=yes DIV=yes LRSC=yes AMO=yes REDO=10 TRACE=no COREMARK=yes LINUX_REGRESSION=no" - ) -// //make run IBUS=CACHED DBUS=CACHED DEBUG_PLUGIN=STD DHRYSTONE=yess SUPERVISOR=yes CSR=yes COMPRESSED=no MUL=yes DIV=yes LRSC=yes AMO=yes REDO=1 TRACE=no LINUX_REGRESSION=yes SEED=42 +// getDmips( +// name = "GenLinuxBalenced", +// gen = LinuxGen.main(Array.fill[String](0)("")), +// testCmd = "make clean run IBUS=CACHED DBUS=CACHED DEBUG_PLUGIN=STD DHRYSTONE=yes SUPERVISOR=yes MMU=no CSR=yes CSR_SKIP_TEST=yes COMPRESSED=no MUL=yes DIV=yes LRSC=yes AMO=yes REDO=10 TRACE=no COREMARK=yes LINUX_REGRESSION=no" +// ) test("final_report") { diff --git a/src/test/scala/vexriscv/ip/fpu/FpuTest.scala b/src/test/scala/vexriscv/ip/fpu/FpuTest.scala index 3190a0c..c2cd7d7 100644 --- a/src/test/scala/vexriscv/ip/fpu/FpuTest.scala +++ b/src/test/scala/vexriscv/ip/fpu/FpuTest.scala @@ -16,32 +16,47 @@ class FpuTest extends FunSuite{ test("directed"){ + val portCount = 1 val p = FpuParameter( internalMantissaSize = 23, - withDouble = false, - sourceCount = 1 + withDouble = false ) - SimConfig.withFstWave.compile(new FpuCore(p)).doSim(seed = 42){ dut => + SimConfig.withFstWave.compile(new FpuCore(1, p)).doSim(seed = 42){ dut => dut.clockDomain.forkStimulus(10) - val cpus = for(id <- 0 until 1 << p.sourceWidth) yield new { + val cpus = for(id <- 0 until portCount) yield new { val cmdQueue = mutable.Queue[FpuCmd => Unit]() val commitQueue = mutable.Queue[FpuCommit => Unit]() val loadQueue = mutable.Queue[FpuLoad => Unit]() val rspQueue = mutable.Queue[FpuRsp => Unit]() - StreamDriver(dut.io.port.commit(id) ,dut.clockDomain){payload => + StreamDriver(dut.io.port(id).cmd ,dut.clockDomain){payload => + if(cmdQueue.isEmpty) false else { + cmdQueue.dequeue().apply(payload) + true + } + } + + + StreamMonitor(dut.io.port(id)rsp, dut.clockDomain){payload => + rspQueue.dequeue().apply(payload) + } + + StreamReadyRandomizer(dut.io.port(id).rsp, dut.clockDomain) + + + StreamDriver(dut.io.port(id).commit ,dut.clockDomain){payload => if(commitQueue.isEmpty) false else { commitQueue.dequeue().apply(payload) true } } - StreamDriver(dut.io.port.load(id) ,dut.clockDomain){payload => + StreamDriver(dut.io.port(id).load ,dut.clockDomain){payload => if(loadQueue.isEmpty) false else { loadQueue.dequeue().apply(payload) true @@ -50,7 +65,6 @@ class FpuTest extends FunSuite{ def loadRaw(rd : Int, value : BigInt): Unit ={ cmdQueue += {cmd => - cmd.source #= id cmd.opcode #= cmd.opcode.spinalEnum.LOAD cmd.value.randomize() cmd.rs1.randomize() @@ -72,7 +86,6 @@ class FpuTest extends FunSuite{ def storeRaw(rs : Int)(body : FpuRsp => Unit): Unit ={ cmdQueue += {cmd => - cmd.source #= id cmd.opcode #= cmd.opcode.spinalEnum.STORE cmd.value.randomize() cmd.rs1.randomize() @@ -90,7 +103,6 @@ class FpuTest extends FunSuite{ def mul(rd : Int, rs1 : Int, rs2 : Int): Unit ={ cmdQueue += {cmd => - cmd.source #= id cmd.opcode #= cmd.opcode.spinalEnum.MUL cmd.value.randomize() cmd.rs1 #= rs1 @@ -105,7 +117,6 @@ class FpuTest extends FunSuite{ def add(rd : Int, rs1 : Int, rs2 : Int): Unit ={ cmdQueue += {cmd => - cmd.source #= id cmd.opcode #= cmd.opcode.spinalEnum.ADD cmd.value.randomize() cmd.rs1 #= rs1 @@ -120,7 +131,6 @@ class FpuTest extends FunSuite{ def div(rd : Int, rs1 : Int, rs2 : Int): Unit ={ cmdQueue += {cmd => - cmd.source #= id cmd.opcode #= cmd.opcode.spinalEnum.DIV cmd.value.randomize() cmd.rs1 #= rs1 @@ -135,7 +145,6 @@ class FpuTest extends FunSuite{ def sqrt(rd : Int, rs1 : Int): Unit ={ cmdQueue += {cmd => - cmd.source #= id cmd.opcode #= cmd.opcode.spinalEnum.SQRT cmd.value.randomize() cmd.rs1 #= rs1 @@ -150,7 +159,6 @@ class FpuTest extends FunSuite{ def fma(rd : Int, rs1 : Int, rs2 : Int, rs3 : Int): Unit ={ cmdQueue += {cmd => - cmd.source #= id cmd.opcode #= cmd.opcode.spinalEnum.FMA cmd.value.randomize() cmd.rs1 #= rs1 @@ -164,25 +172,6 @@ class FpuTest extends FunSuite{ } } - StreamDriver(dut.io.port.cmd ,dut.clockDomain){payload => - cpus.map(_.cmdQueue).filter(_.nonEmpty).toSeq match { - case Nil => false - case l => { - l.randomPick().dequeue().apply(payload) - true - } - } - } - - - - - StreamMonitor(dut.io.port.rsp, dut.clockDomain){payload => - cpus(payload.source.toInt).rspQueue.dequeue().apply(payload) - } - - StreamReadyRandomizer(dut.io.port.rsp, dut.clockDomain) -