From f180ba2fc96a0899c3627840f60bdb169b19ee40 Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Tue, 16 Feb 2021 15:38:51 +0100 Subject: [PATCH] fpu double fixes DataCache now support wide load/store --- src/main/scala/vexriscv/TestsWorkspace.scala | 258 ++++++------------ src/main/scala/vexriscv/VexRiscv.scala | 1 - src/main/scala/vexriscv/ip/DataCache.scala | 111 +++++--- src/main/scala/vexriscv/ip/fpu/FpuCore.scala | 37 +-- .../vexriscv/plugin/DBusCachedPlugin.scala | 52 ++-- .../scala/vexriscv/plugin/FpuPlugin.scala | 9 +- src/test/cpp/regression/main.cpp | 237 +++++++--------- src/test/cpp/regression/makefile | 9 +- src/test/scala/vexriscv/DhrystoneBench.scala | 198 +++++++------- .../vexriscv/TestIndividualFeatures.scala | 9 +- src/test/scala/vexriscv/ip/fpu/FpuTest.scala | 34 ++- 11 files changed, 434 insertions(+), 521 deletions(-) diff --git a/src/main/scala/vexriscv/TestsWorkspace.scala b/src/main/scala/vexriscv/TestsWorkspace.scala index 96f2551..0154036 100644 --- a/src/main/scala/vexriscv/TestsWorkspace.scala +++ b/src/main/scala/vexriscv/TestsWorkspace.scala @@ -32,189 +32,83 @@ import vexriscv.ip.fpu.FpuParameter //make clean all SEED=42 MMU=no STOP_ON_ERROR=yes DBUS_EXCLUSIVE=yes DBUS_INVALIDATE=yes SUPERVISOR=yes REDO=1 DHRYSTONE=yes LRSC=yes AMO=yes TRACE=yes TRACE_START=1000000000 FLOW_INFO=ye IBUS_DATA_WIDTH=128 DBUS_DATA_WIDTH=128 LINUX_SOC_SMP=yes VMLINUX=../../../../../buildroot/output/images/Image RAMDISK=../../../../../buildroot/output/images/rootfs.cpio DTB=../../../../../buildroot/output/images/dtb EMULATOR=../../../../../opensbi/build/platform/spinal/vexriscv/sim/smp/firmware/fw_jump.bin object TestsWorkspace { def main(args: Array[String]) { -// def configFull = { -// val config = VexRiscvConfig( -// plugins = List( -// new MmuPlugin( -// ioRange = x => x(31 downto 28) === 0xF -// ), -// //Uncomment the whole IBusSimplePlugin and comment IBusCachedPlugin if you want uncached iBus config -// // new IBusSimplePlugin( -// // resetVector = 0x80000000l, -// // cmdForkOnSecondStage = false, -// // cmdForkPersistence = false, -// // prediction = DYNAMIC_TARGET, -// // historyRamSizeLog2 = 10, -// // catchAccessFault = true, -// // compressedGen = true, -// // busLatencyMin = 1, -// // injectorStage = true, -// // memoryTranslatorPortConfig = withMmu generate MmuPortConfig( -// // portTlbSize = 4 -// // ) -// // ), -// -// //Uncomment the whole IBusCachedPlugin and comment IBusSimplePlugin if you want cached iBus config -// new IBusCachedPlugin( -// resetVector = 0x80000000l, -// compressedGen = false, -// prediction = STATIC, -// injectorStage = false, -// config = InstructionCacheConfig( -// cacheSize = 4096*2, -// bytePerLine = 64, -// wayCount = 2, -// addressWidth = 32, -// cpuDataWidth = 32, -// memDataWidth = 128, -// catchIllegalAccess = true, -// catchAccessFault = true, -// asyncTagMemory = false, -// twoCycleRam = true, -// twoCycleCache = true, -// reducedBankWidth = true -// // ) -// ), -// memoryTranslatorPortConfig = MmuPortConfig( -// portTlbSize = 4, -// latency = 1, -// earlyRequireMmuLockup = true, -// earlyCacheHits = true -// ) -// ), -// // ).newTightlyCoupledPort(TightlyCoupledPortParameter("iBusTc", a => a(30 downto 28) === 0x0 && a(5))), -// // new DBusSimplePlugin( -// // catchAddressMisaligned = true, -// // catchAccessFault = true, -// // earlyInjection = false, -// // withLrSc = true, -// // memoryTranslatorPortConfig = withMmu generate MmuPortConfig( -// // portTlbSize = 4 -// // ) -// // ), -// new DBusCachedPlugin( -// dBusCmdMasterPipe = true, -// dBusCmdSlavePipe = true, -// dBusRspSlavePipe = true, -// config = new DataCacheConfig( -// cacheSize = 4096*1, -// bytePerLine = 64, -// wayCount = 1, -// addressWidth = 32, -// cpuDataWidth = 32, -// memDataWidth = 128, -// catchAccessError = true, -// catchIllegal = true, -// catchUnaligned = true, -// withLrSc = true, -// withAmo = true, -// withExclusive = true, -// withInvalidate = true, -// pendingMax = 32 -// // ) -// ), -// memoryTranslatorPortConfig = MmuPortConfig( -// portTlbSize = 4, -// latency = 1, -// earlyRequireMmuLockup = true, -// earlyCacheHits = true -// ) -// ), -// -// // new MemoryTranslatorPlugin( -// // tlbSize = 32, -// // virtualRange = _(31 downto 28) === 0xC, -// // ioRange = _(31 downto 28) === 0xF -// // ), -// -// new DecoderSimplePlugin( -// catchIllegalInstruction = true -// ), -// new RegFilePlugin( -// regFileReadyKind = plugin.ASYNC, -// zeroBoot = true -// ), -// new IntAluPlugin, -// new SrcPlugin( -// separatedAddSub = false -// ), -// new FullBarrelShifterPlugin(earlyInjection = false), -// // new LightShifterPlugin, -// new HazardSimplePlugin( -// bypassExecute = true, -// bypassMemory = true, -// bypassWriteBack = true, -// bypassWriteBackBuffer = true, -// pessimisticUseSrc = false, -// pessimisticWriteRegFile = false, -// pessimisticAddressMatch = false -// ), -// // new HazardSimplePlugin(false, true, false, true), -// // new HazardSimplePlugin(false, false, false, false), -// new MulPlugin, -// new MulDivIterativePlugin( -// genMul = false, -// genDiv = true, -// mulUnrollFactor = 32, -// divUnrollFactor = 1 -// ), -// // new DivPlugin, -// new CsrPlugin(CsrPluginConfig.all2(0x80000020l).copy(ebreakGen = false, misaExtensionsInit = Riscv.misaToInt("imas"))), -// // new CsrPlugin(//CsrPluginConfig.all2(0x80000020l).copy(ebreakGen = true)/* -// // CsrPluginConfig( -// // catchIllegalAccess = false, -// // mvendorid = null, -// // marchid = null, -// // mimpid = null, -// // mhartid = null, -// // misaExtensionsInit = 0, -// // misaAccess = CsrAccess.READ_ONLY, -// // mtvecAccess = CsrAccess.WRITE_ONLY, -// // mtvecInit = 0x80000020l, -// // mepcAccess = CsrAccess.READ_WRITE, -// // mscratchGen = true, -// // mcauseAccess = CsrAccess.READ_ONLY, -// // mbadaddrAccess = CsrAccess.READ_ONLY, -// // mcycleAccess = CsrAccess.NONE, -// // minstretAccess = CsrAccess.NONE, -// // ecallGen = true, -// // ebreakGen = true, -// // wfiGenAsWait = false, -// // wfiGenAsNop = true, -// // ucycleAccess = CsrAccess.NONE -// // )), -// new DebugPlugin(ClockDomain.current.clone(reset = Bool().setName("debugReset"))), -// new BranchPlugin( -// earlyBranch = false, -// catchAddressMisaligned = true, -// fenceiGenAsAJump = false -// ), -// new YamlPlugin("cpu0.yaml") -// ) -// ) -// config -// } - - -// import spinal.core.sim._ -// SimConfig.withConfig(SpinalConfig(mergeAsyncProcess = false, anonymSignalPrefix = "zz_")).allOptimisation.compile(new VexRiscv(configFull)).doSimUntilVoid{ dut => -// dut.clockDomain.forkStimulus(10) -// dut.clockDomain.forkSimSpeedPrinter(4) -// var iBus : InstructionCacheMemBus = null -// -// dut.plugins.foreach{ -// case plugin: IBusCachedPlugin => iBus = plugin.iBus -// case _ => -// } -// dut.clockDomain.onSamplings{ -//// iBus.cmd.ready.randomize() -// iBus.rsp.data #= 0x13 -// } -// } - SpinalConfig().generateVerilog { -// make clean run REDO=10 CSR=no MMU=no COREMARK=no RVF=yes REDO=1 TRACE=yes - val config = GenFull.config +// make clean all REDO=10 CSR=no MMU=no COREMARK=no RVF=no REDO=1 DBUS_LOAD_DATA_WIDTH=64 DBUS_STORE_DATA_WIDTH=64 DEBUG=ye TRACE=ye + val config = VexRiscvConfig( + plugins = List( + new IBusCachedPlugin( + prediction = DYNAMIC, + config = InstructionCacheConfig( + cacheSize = 4096, + bytePerLine =32, + wayCount = 1, + addressWidth = 32, + cpuDataWidth = 32, + memDataWidth = 32, + catchIllegalAccess = true, + catchAccessFault = true, + asyncTagMemory = false, + twoCycleRam = true, + twoCycleCache = true + ), + memoryTranslatorPortConfig = MmuPortConfig( + portTlbSize = 4 + ) + ), + new DBusCachedPlugin( + config = new DataCacheConfig( + cacheSize = 4096, + bytePerLine = 32, + wayCount = 1, + addressWidth = 32, + rfDataWidth = 32, + cpuDataWidth = 64, + memDataWidth = 64, + catchAccessError = true, + catchIllegal = true, + catchUnaligned = true + ), + memoryTranslatorPortConfig = MmuPortConfig( + portTlbSize = 6 + ) + ), + new MmuPlugin( + virtualRange = _(31 downto 28) === 0xC, + ioRange = _(31 downto 28) === 0xF + ), + new DecoderSimplePlugin( + catchIllegalInstruction = true + ), + new RegFilePlugin( + regFileReadyKind = plugin.SYNC, + zeroBoot = false + ), + new IntAluPlugin, + new SrcPlugin( + separatedAddSub = false, + executeInsertion = true + ), + new FullBarrelShifterPlugin, + new HazardSimplePlugin( + bypassExecute = true, + bypassMemory = true, + bypassWriteBack = true, + bypassWriteBackBuffer = true, + pessimisticUseSrc = false, + pessimisticWriteRegFile = false, + pessimisticAddressMatch = false + ), + new MulPlugin, + new DivPlugin, + new CsrPlugin(CsrPluginConfig.small(0x80000020l)), + new DebugPlugin(ClockDomain.current.clone(reset = Bool().setName("debugReset"))), + new BranchPlugin( + earlyBranch = false, + catchAddressMisaligned = true + ), + new YamlPlugin("cpu0.yaml") + ) + ) config.plugins += new FpuPlugin( externalFpu = false, p = FpuParameter( diff --git a/src/main/scala/vexriscv/VexRiscv.scala b/src/main/scala/vexriscv/VexRiscv.scala index a08f9c9..5f7865c 100644 --- a/src/main/scala/vexriscv/VexRiscv.scala +++ b/src/main/scala/vexriscv/VexRiscv.scala @@ -46,7 +46,6 @@ case class VexRiscvConfig(){ object LEGAL_INSTRUCTION extends Stageable(Bool) object REGFILE_WRITE_VALID extends Stageable(Bool) object REGFILE_WRITE_DATA extends Stageable(Bits(32 bits)) - object DBUS_DATA extends Stageable(Bits(32 bits)) object MPP extends PipelineThing[UInt] object DEBUG_BYPASS_CACHE extends PipelineThing[Bool] diff --git a/src/main/scala/vexriscv/ip/DataCache.scala b/src/main/scala/vexriscv/ip/DataCache.scala index 0e5ca7a..1f76652 100644 --- a/src/main/scala/vexriscv/ip/DataCache.scala +++ b/src/main/scala/vexriscv/ip/DataCache.scala @@ -16,6 +16,7 @@ case class DataCacheConfig(cacheSize : Int, wayCount : Int, addressWidth : Int, cpuDataWidth : Int, + var rfDataWidth : Int = -1, //-1 mean cpuDataWidth memDataWidth : Int, catchAccessError : Boolean, catchIllegal : Boolean, @@ -31,10 +32,17 @@ case class DataCacheConfig(cacheSize : Int, directTlbHit : Boolean = false, mergeExecuteMemory : Boolean = false, asyncTagMemory : Boolean = false, - aggregationWidth : Int = 0){ + withWriteAggregation : Boolean = false){ + + if(rfDataWidth == -1) rfDataWidth = cpuDataWidth assert(!(mergeExecuteMemory && (earlyDataMux || earlyWaysHits))) assert(!(earlyDataMux && !earlyWaysHits)) assert(isPow2(pendingMax)) + assert(rfDataWidth <= memDataWidth) + + def sizeMax = log2Up(bytePerLine) + def sizeWidth = log2Up(sizeMax + 1) + val aggregationWidth = if(withWriteAggregation) log2Up(memDataBytes+1) else 0 def withWriteResponse = withExclusive def burstSize = bytePerLine*8/memDataWidth val burstLength = bytePerLine/(cpuDataWidth/8) @@ -44,6 +52,7 @@ case class DataCacheConfig(cacheSize : Int, def withExternalLrSc = withLrSc && withExclusive def withExternalAmo = withAmo && withExclusive def cpuDataBytes = cpuDataWidth/8 + def rfDataBytes = rfDataWidth/8 def memDataBytes = memDataWidth/8 def getAxi4SharedConfig() = Axi4Config( addressWidth = addressWidth, @@ -55,6 +64,7 @@ case class DataCacheConfig(cacheSize : Int, useQos = false ) + def getAvalonConfig() = AvalonMMConfig.bursted( addressWidth = addressWidth, dataWidth = memDataWidth, @@ -87,7 +97,7 @@ case class DataCacheConfig(cacheSize : Int, dataWidth = memDataWidth ).addSources(1, BmbSourceParameter( lengthWidth = log2Up(this.bytePerLine), - contextWidth = (if(!withWriteResponse) 1 else 0) + (if(cpuDataWidth != memDataWidth) log2Up(memDataBytes) else 0), + contextWidth = (if(!withWriteResponse) 1 else 0) + aggregationWidth, alignment = BmbParameter.BurstAlignement.LENGTH, canExclusive = withExclusive, withCachedRead = true @@ -120,7 +130,7 @@ case class DataCacheCpuExecute(p : DataCacheConfig) extends Bundle with IMasterS case class DataCacheCpuExecuteArgs(p : DataCacheConfig) extends Bundle{ val wr = Bool - val size = UInt(2 bits) + val size = UInt(log2Up(log2Up(p.cpuDataBytes)+1) bits) val isLrsc = p.withLrSc generate Bool() val isAmo = p.withAmo generate Bool() val amoCtrl = p.withAmo generate new Bundle { @@ -174,10 +184,11 @@ case class DataCacheCpuWriteBack(p : DataCacheConfig) extends Bundle with IMaste val mmuException, unalignedAccess, accessError = Bool() val keepMemRspData = Bool() //Used by external AMO to avoid having an internal buffer val fence = FenceFlags() + val exclusiveOk = Bool() override def asMaster(): Unit = { out(isValid,isStuck,isUser, address, fence, storeData) - in(haltIt, data, mmuException, unalignedAccess, accessError, isWrite, keepMemRspData) + in(haltIt, data, mmuException, unalignedAccess, accessError, isWrite, keepMemRspData, exclusiveOk) } } @@ -205,9 +216,18 @@ case class DataCacheMemCmd(p : DataCacheConfig) extends Bundle{ val address = UInt(p.addressWidth bit) val data = Bits(p.cpuDataWidth bits) val mask = Bits(p.cpuDataWidth/8 bits) - val length = UInt(log2Up(p.burstLength) bits) + val size = UInt(p.sizeWidth bits) //... 1 => 2 bytes ... 2 => 4 bytes ... val exclusive = p.withExclusive generate Bool() val last = Bool + +// def beatCountMinusOne = size.muxListDc((0 to p.sizeMax).map(i => i -> U((1 << i)/p.memDataBytes))) +// def beatCount = size.muxListDc((0 to p.sizeMax).map(i => i -> U((1 << i)/p.memDataBytes-1))) + + //Utilities which does quite a few assumtions about the bus utilisation + def byteCountMinusOne = size.muxListDc((0 to p.sizeMax).map(i => i -> U((1 << i)-1, log2Up(p.bytePerLine) bits))) + def beatCountMinusOne = (size === log2Up(p.bytePerLine)) ? U(p.burstSize-1) | U(0) + def beatCount = (size === log2Up(p.bytePerLine)) ? U(p.burstSize) | U(1) + def isBurst = size === log2Up(p.bytePerLine) } case class DataCacheMemRsp(p : DataCacheConfig) extends Bundle{ val aggregated = UInt(p.aggregationWidth bits) @@ -267,9 +287,9 @@ case class DataCacheMemBus(p : DataCacheConfig) extends Bundle with IMasterSlave axi.sharedCmd.write := cmdStage.wr axi.sharedCmd.prot := "010" axi.sharedCmd.cache := "1111" - axi.sharedCmd.size := log2Up(p.memDataWidth/8) + axi.sharedCmd.size := cmd.size.max(log2Up(p.memDataBytes)) axi.sharedCmd.addr := cmdStage.address - axi.sharedCmd.len := cmdStage.length.resized + axi.sharedCmd.len := cmd.beatCountMinusOne.resized axi.writeData.arbitrationFrom(dataStage) axi.writeData.data := dataStage.data @@ -293,7 +313,7 @@ case class DataCacheMemBus(p : DataCacheConfig) extends Bundle with IMasterSlave mm.read := cmd.valid && !cmd.wr mm.write := cmd.valid && cmd.wr mm.address := cmd.address(cmd.address.high downto log2Up(p.memDataWidth/8)) @@ U(0,log2Up(p.memDataWidth/8) bits) - mm.burstCount := cmd.length + U(1, widthOf(mm.burstCount) bits) + mm.burstCount := cmd.beatCount mm.byteEnable := cmd.mask mm.writeData := cmd.data @@ -302,23 +322,25 @@ case class DataCacheMemBus(p : DataCacheConfig) extends Bundle with IMasterSlave rsp.data := mm.readData rsp.error := mm.response =/= AvalonMM.Response.OKAY + assert(p.cpuDataWidth == p.rfDataWidth) mm } def toWishbone(): Wishbone = { + assert(p.cpuDataWidth == p.rfDataWidth) val wishboneConfig = p.getWishboneConfig() val bus = Wishbone(wishboneConfig) val counter = Reg(UInt(log2Up(p.burstSize) bits)) init(0) val cmdBridge = Stream (DataCacheMemCmd(p)) - val isBurst = cmdBridge.length =/= 0 + val isBurst = cmdBridge.isBurst cmdBridge.valid := cmd.valid cmdBridge.address := (isBurst ? (cmd.address(31 downto widthOf(counter) + 2) @@ counter @@ U"00") | (cmd.address(31 downto 2) @@ U"00")) cmdBridge.wr := cmd.wr cmdBridge.mask := cmd.mask cmdBridge.data := cmd.data - cmdBridge.length := cmd.length - cmdBridge.last := counter === cmd.length + cmdBridge.size := cmd.size + cmdBridge.last := !isBurst || counter === p.burstSize-1 cmd.ready := cmdBridge.ready && (cmdBridge.wr || cmdBridge.last) @@ -351,6 +373,7 @@ case class DataCacheMemBus(p : DataCacheConfig) extends Bundle with IMasterSlave def toPipelinedMemoryBus(): PipelinedMemoryBus = { val bus = PipelinedMemoryBus(32,32) + assert(p.cpuDataWidth == p.rfDataWidth) val counter = Reg(UInt(log2Up(p.burstSize) bits)) init(0) when(bus.cmd.fire){ counter := counter + 1 } @@ -361,7 +384,7 @@ case class DataCacheMemBus(p : DataCacheConfig) extends Bundle with IMasterSlave bus.cmd.write := cmd.wr bus.cmd.mask := cmd.mask bus.cmd.data := cmd.data - cmd.ready := bus.cmd.ready && (cmd.wr || counter === cmd.length) + cmd.ready := bus.cmd.ready && (cmd.wr || counter === p.burstSize-1) rsp.valid := bus.rsp.valid rsp.data := bus.rsp.payload.data rsp.error := False @@ -374,14 +397,16 @@ case class DataCacheMemBus(p : DataCacheConfig) extends Bundle with IMasterSlave setCompositeName(DataCacheMemBus.this, "Bridge", true) val pipelinedMemoryBusConfig = p.getBmbParameter() val bus = Bmb(pipelinedMemoryBusConfig).setCompositeName(this,"toBmb", true) - val aggregationMax = p.memDataBytes case class Context() extends Bundle{ val isWrite = !p.withWriteResponse generate Bool() - val rspCount = (p.cpuDataWidth != p.memDataWidth) generate UInt(log2Up(aggregationMax) bits) + val rspCount = (p.aggregationWidth != 0) generate UInt(p.aggregationWidth bits) } - val withoutWriteBuffer = if(p.cpuDataWidth == p.memDataWidth) new Area { + + def sizeToLength(size : UInt) = size.muxListDc((0 to log2Up(p.cpuDataBytes)).map(i => U(i) -> U((1 << i)-1, log2Up(p.cpuDataBytes) bits))) + + val withoutWriteBuffer = if(p.aggregationWidth == 0) new Area { val busCmdContext = Context() bus.cmd.valid := cmd.valid @@ -389,7 +414,7 @@ case class DataCacheMemBus(p : DataCacheConfig) extends Bundle with IMasterSlave bus.cmd.opcode := (cmd.wr ? B(Bmb.Cmd.Opcode.WRITE) | B(Bmb.Cmd.Opcode.READ)) bus.cmd.address := cmd.address.resized bus.cmd.data := cmd.data - bus.cmd.length := (cmd.length << 2) | 3 + bus.cmd.length := cmd.byteCountMinusOne bus.cmd.mask := cmd.mask if (p.withExclusive) bus.cmd.exclusive := cmd.exclusive if (!p.withWriteResponse) busCmdContext.isWrite := cmd.wr @@ -399,7 +424,7 @@ case class DataCacheMemBus(p : DataCacheConfig) extends Bundle with IMasterSlave if(p.withInvalidate) sync.arbitrationFrom(bus.sync) } - val withWriteBuffer = if(p.cpuDataWidth != p.memDataWidth) new Area { + val withWriteBuffer = if(p.aggregationWidth != 0) new Area { val buffer = new Area { val stream = cmd.toEvent().m2sPipe() val address = Reg(UInt(p.addressWidth bits)) @@ -413,7 +438,7 @@ case class DataCacheMemBus(p : DataCacheConfig) extends Bundle with IMasterSlave val aggregationRange = log2Up(p.memDataWidth/8)-1 downto log2Up(p.cpuDataWidth/8) val tagRange = p.addressWidth-1 downto aggregationRange.high+1 val aggregationEnabled = Reg(Bool) - val aggregationCounter = Reg(UInt(log2Up(aggregationMax) bits)) init(0) + val aggregationCounter = Reg(UInt(p.aggregationWidth bits)) init(0) val aggregationCounterFull = aggregationCounter === aggregationCounter.maxValue val timer = Reg(UInt(log2Up(timeoutCycles)+1 bits)) init(0) val timerFull = timer.msb @@ -467,7 +492,7 @@ case class DataCacheMemBus(p : DataCacheConfig) extends Bundle with IMasterSlave when(cmd.fire){ buffer.write := cmd.wr buffer.address := cmd.address.resized - buffer.length := (cmd.length << 2) | 3 + buffer.length := cmd.byteCountMinusOne if (p.withExclusive) buffer.exclusive := cmd.exclusive when(cmd.wr && !cmd.uncached && !cmdExclusive){ @@ -484,7 +509,7 @@ case class DataCacheMemBus(p : DataCacheConfig) extends Bundle with IMasterSlave rsp.aggregated := rspCtx.rspCount val syncLogic = p.withInvalidate generate new Area{ - val cmdCtx = Stream(UInt(log2Up(aggregationMax) bits)) + val cmdCtx = Stream(UInt(p.aggregationWidth bits)) cmdCtx.valid := bus.cmd.fire && bus.cmd.isWrite cmdCtx.payload := aggregationCounter halt setWhen(!cmdCtx.ready) @@ -563,6 +588,7 @@ class DataCache(val p : DataCacheConfig, mmuParameter : MemoryTranslatorBusParam val memWordRange = log2Up(bytePerLine)-1 downto log2Up(bytePerMemWord) val hitRange = tagRange.high downto lineRange.low val memWordToCpuWordRange = log2Up(bytePerMemWord)-1 downto log2Up(bytePerWord) + val cpuWordToRfWordRange = log2Up(bytePerWord)-1 downto log2Up(p.rfDataBytes) class LineInfo() extends Bundle{ @@ -721,11 +747,15 @@ class DataCache(val p : DataCacheConfig, mmuParameter : MemoryTranslatorBusParam val stage0 = new Area{ - val mask = io.cpu.execute.size.mux ( - U(0) -> B"0001", - U(1) -> B"0011", - default -> B"1111" - ) |<< io.cpu.execute.address(1 downto 0) +// val mask = io.cpu.execute.size.mux ( +// U(0) -> B"0001", +// U(1) -> B"0011", +// default -> B"1111" +// ) |<< io.cpu.execute.address(1 downto 0) + + val mask = io.cpu.execute.size.muxListDc((0 to log2Up(p.cpuDataBytes)).map(i => U(i) -> B((1 << (1 << i)) -1, p.cpuDataBytes bits))) |<< io.cpu.execute.address(log2Up(p.cpuDataBytes)-1 downto 0) + + val dataColisions = collisionProcess(io.cpu.execute.address(lineRange.high downto cpuWordRange.low), mask) val wayInvalidate = B(0, wayCount bits) //Used if invalidate enabled @@ -792,7 +822,8 @@ class DataCache(val p : DataCacheConfig, mmuParameter : MemoryTranslatorBusParam val wayInvalidate = stagePipe(stageA. wayInvalidate) val consistancyHazard = if(stageA.consistancyCheck != null) stagePipe(stageA.consistancyCheck.hazard) else False val dataColisions = stagePipe(stageA.dataColisions) - val unaligned = if(!catchUnaligned) False else stagePipe((stageA.request.size === 2 && io.cpu.memory.address(1 downto 0) =/= 0) || (stageA.request.size === 1 && io.cpu.memory.address(0 downto 0) =/= 0)) +// val unaligned = if(!catchUnaligned) False else stagePipe((stageA.request.size === 2 && io.cpu.memory.address(1 downto 0) =/= 0) || (stageA.request.size === 1 && io.cpu.memory.address(0 downto 0) =/= 0)) + val unaligned = if(!catchUnaligned) False else stagePipe((1 to log2Up(p.cpuDataBytes)).map(i => stageA.request.size === i && io.cpu.memory.address(i-1 downto 0) =/= 0).orR) val waysHitsBeforeInvalidate = if(earlyWaysHits) stagePipe(B(stageA.wayHits)) else B(tagsReadRsp.map(tag => mmuRsp.physicalAddress(tagRange) === tag.address && tag.valid).asBits()) val waysHits = waysHitsBeforeInvalidate & ~wayInvalidate val waysHit = waysHits.orR @@ -848,8 +879,9 @@ class DataCache(val p : DataCacheConfig, mmuParameter : MemoryTranslatorBusParam val requestDataBypass = CombInit(io.cpu.writeBack.storeData) import DataCacheExternalAmoStates._ val amo = withAmo generate new Area{ - def rf = io.cpu.writeBack.storeData - def mem = if(withInternalAmo) dataMux else ioMemRspMuxed + def rf = io.cpu.writeBack.storeData(p.rfDataWidth-1 downto 0) + def memLarger = if(withInternalAmo) dataMux else ioMemRspMuxed + def mem = memLarger.subdivideIn(rfDataWidth bits).read(io.cpu.writeBack.address(cpuWordToRfWordRange)) val compare = request.amoCtrl.alu.msb val unsigned = request.amoCtrl.alu(2 downto 1) === B"11" val addSub = (rf.asSInt + Mux(compare, ~mem, mem).asSInt + Mux(compare, S(1), S(0))).asBits @@ -898,13 +930,13 @@ class DataCache(val p : DataCacheConfig, mmuParameter : MemoryTranslatorBusParam io.mem.cmd.valid := False - io.mem.cmd.address := mmuRsp.physicalAddress(tagRange.high downto cpuWordRange.low) @@ U(0, cpuWordRange.low bits) - io.mem.cmd.length := 0 + io.mem.cmd.address := mmuRsp.physicalAddress io.mem.cmd.last := True io.mem.cmd.wr := request.wr io.mem.cmd.mask := mask io.mem.cmd.data := requestDataBypass io.mem.cmd.uncached := mmuRsp.isIoAccess + io.mem.cmd.size := request.size.resized if(withExternalLrSc) io.mem.cmd.exclusive := request.isLrsc || isAmo @@ -962,8 +994,6 @@ class DataCache(val p : DataCacheConfig, mmuParameter : MemoryTranslatorBusParam //Write through io.mem.cmd.valid setWhen(request.wr) - io.mem.cmd.address := mmuRsp.physicalAddress(tagRange.high downto cpuWordRange.low) @@ U(0, cpuWordRange.low bits) - io.mem.cmd.length := 0 io.cpu.writeBack.haltIt clearWhen(!request.wr || io.mem.cmd.ready) if(withInternalAmo) when(isAmo){ @@ -989,8 +1019,8 @@ class DataCache(val p : DataCacheConfig, mmuParameter : MemoryTranslatorBusParam //Emit cmd io.mem.cmd.valid setWhen(!memCmdSent) io.mem.cmd.wr := False - io.mem.cmd.address := mmuRsp.physicalAddress(tagRange.high downto lineRange.low) @@ U(0,lineRange.low bit) - io.mem.cmd.length := p.burstLength-1 + io.mem.cmd.address(0, lineRange.low bits) := 0 + io.mem.cmd.size := log2Up(p.bytePerLine) loaderValid setWhen(io.mem.cmd.ready) } @@ -1006,15 +1036,18 @@ class DataCache(val p : DataCacheConfig, mmuParameter : MemoryTranslatorBusParam if(catchAccessError) io.cpu.writeBack.accessError := (waysHits & B(tagsReadRsp.map(_.error))) =/= 0 || (loadStoreFault && !mmuRsp.isPaging) } - if(withLrSc) when(request.isLrsc && request.wr){ + if(withLrSc) { val success = if(withInternalLrSc)lrSc.reserved else io.mem.rsp.exclusive - io.cpu.writeBack.data := B(!success).resized - if(withExternalLrSc) when(io.cpu.writeBack.isValid && io.mem.rsp.valid && rspSync && success && waysHit){ - cpuWriteToCache := True + io.cpu.writeBack.exclusiveOk := success + when(request.isLrsc && request.wr){ + // io.cpu.writeBack.data := B(!success).resized + if(withExternalLrSc) when(io.cpu.writeBack.isValid && io.mem.rsp.valid && rspSync && success && waysHit){ + cpuWriteToCache := True + } } } if(withAmo) when(request.isAmo){ - requestDataBypass := amo.resultReg + requestDataBypass.subdivideIn(p.rfDataWidth bits).foreach(_ := amo.resultReg) } //remove side effects on exceptions diff --git a/src/main/scala/vexriscv/ip/fpu/FpuCore.scala b/src/main/scala/vexriscv/ip/fpu/FpuCore.scala index c674bfe..df953f3 100644 --- a/src/main/scala/vexriscv/ip/fpu/FpuCore.scala +++ b/src/main/scala/vexriscv/ip/fpu/FpuCore.scala @@ -275,13 +275,13 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{ output.rs1.setNanQuiet output.rs1.sign := False } - when(s1.format === FpuFormat.FLOAT =/= rs2Entry.boxed) { - output.rs2.setNanQuiet - output.rs2.sign := False - } - when(s1.format === FpuFormat.FLOAT =/= rs3Entry.boxed) { - output.rs3.setNanQuiet - } + } + when(s1.format === FpuFormat.FLOAT =/= rs2Entry.boxed) { + output.rs2.setNanQuiet + output.rs2.sign := False + } + when(s1.format === FpuFormat.FLOAT =/= rs3Entry.boxed) { + output.rs3.setNanQuiet } } } @@ -733,7 +733,6 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{ val sgnjRs1Sign = CombInit(input.rs1.sign) val sgnjRs2Sign = CombInit(input.rs2.sign) if(p.withDouble){ - sgnjRs1Sign setWhen(input.rs1Boxed && input.format === FpuFormat.DOUBLE) sgnjRs2Sign setWhen(input.rs2Boxed && input.format === FpuFormat.DOUBLE) } val sgnjResult = (sgnjRs1Sign && input.arg(1)) ^ sgnjRs2Sign ^ input.arg(0) @@ -786,22 +785,12 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{ } } is(FpuOpcode.SGNJ){ - rfOutput.value.sign := sgnjResult - if(p.withDouble) when(input.format === FpuFormat.DOUBLE){ - when(input.rs1Boxed){ - rfOutput.value.sign := input.rs1.sign - rfOutput.format := FpuFormat.FLOAT - } -// //kill boxing => F32 -> F64 NAN -// when(input.rs1Boxed && !sgnjResult){ -// rfOutput.value.setNan -// rfOutput.value.mantissa.setAll() -// rfOutput.value.mantissa(31 downto 0) := input.rs1.sign ## input.rs1.exponent -// } -// //Spawn boxing => F64 NAN -> F32 -// when(!input.rs1Boxed && input.rs1.exponent === exponentOne + 1024 && input.rs1.mantissa(32, 52-32 bits).andR && sgnjResult){ -// -// } + when(!input.rs1.isNan) { + rfOutput.value.sign := sgnjResult + } + if(p.withDouble) when(input.rs1Boxed && input.format === FpuFormat.DOUBLE){ + rfOutput.value.sign := input.rs1.sign + rfOutput.format := FpuFormat.FLOAT } } if(p.withDouble) is(FpuOpcode.FCVT_X_X){ diff --git a/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala b/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala index a9562d0..2014571 100644 --- a/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala +++ b/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala @@ -6,6 +6,8 @@ import spinal.core._ import spinal.lib._ import spinal.lib.bus.amba4.axi.Axi4 +import scala.collection.mutable.ArrayBuffer + class DAxiCachedPlugin(config : DataCacheConfig, memoryTranslatorPortConfig : Any = null) extends DBusCachedPlugin(config, memoryTranslatorPortConfig) { var dAxi : Axi4 = null @@ -22,6 +24,7 @@ trait DBusEncodingService { def addLoadWordEncoding(key: MaskedLiteral): Unit def addStoreWordEncoding(key: MaskedLiteral): Unit def bypassStore(data : Bits) : Unit + def loadData() : Bits } class DBusCachedPlugin(val config : DataCacheConfig, @@ -90,10 +93,15 @@ class DBusCachedPlugin(val config : DataCacheConfig, ) } + val bypassStoreList = ArrayBuffer[(Bool, Bits)]() + override def bypassStore(data: Bits): Unit = { - pipeline.stages.last.input(MEMORY_STORE_DATA) := data + bypassStoreList += ConditionalContext.isTrue() -> data } + + override def loadData(): Bits = pipeline.stages.last.output(MEMORY_LOAD_DATA) + object MEMORY_ENABLE extends Stageable(Bool) object MEMORY_MANAGMENT extends Stageable(Bool) object MEMORY_WR extends Stageable(Bool) @@ -104,7 +112,9 @@ class DBusCachedPlugin(val config : DataCacheConfig, object MEMORY_FORCE_CONSTISTENCY extends Stageable(Bool) object IS_DBUS_SHARING extends Stageable(Bool()) object MEMORY_VIRTUAL_ADDRESS extends Stageable(UInt(32 bits)) - object MEMORY_STORE_DATA extends Stageable(Bits(32 bits)) + object MEMORY_STORE_DATA_RF extends Stageable(Bits(config.rfDataWidth bits)) +// object MEMORY_STORE_DATA_CPU extends Stageable(Bits(config.cpuDataWidth bits)) + object MEMORY_LOAD_DATA extends Stageable(Bits(config.cpuDataWidth bits)) override def setup(pipeline: VexRiscv): Unit = { import Riscv._ @@ -292,12 +302,12 @@ class DBusCachedPlugin(val config : DataCacheConfig, cache.io.cpu.execute.isValid := arbitration.isValid && input(MEMORY_ENABLE) cache.io.cpu.execute.address := input(SRC_ADD).asUInt cache.io.cpu.execute.args.wr := input(MEMORY_WR) - insert(MEMORY_STORE_DATA) := size.mux( + insert(MEMORY_STORE_DATA_RF) := size.mux( U(0) -> input(RS2)( 7 downto 0) ## input(RS2)( 7 downto 0) ## input(RS2)(7 downto 0) ## input(RS2)(7 downto 0), U(1) -> input(RS2)(15 downto 0) ## input(RS2)(15 downto 0), default -> input(RS2)(31 downto 0) ) - cache.io.cpu.execute.args.size := size + cache.io.cpu.execute.args.size := size.resized if(twoStageMmu) { mmuBus.cmd(0).isValid := cache.io.cpu.execute.isValid @@ -358,13 +368,16 @@ class DBusCachedPlugin(val config : DataCacheConfig, } val managementStage = stages.last - managementStage plug new Area{ + val mgs = managementStage plug new Area{ import managementStage._ cache.io.cpu.writeBack.isValid := arbitration.isValid && input(MEMORY_ENABLE) cache.io.cpu.writeBack.isStuck := arbitration.isStuck cache.io.cpu.writeBack.isUser := (if(privilegeService != null) privilegeService.isUser() else False) cache.io.cpu.writeBack.address := U(input(REGFILE_WRITE_DATA)) - cache.io.cpu.writeBack.storeData := input(MEMORY_STORE_DATA) + cache.io.cpu.writeBack.storeData.subdivideIn(32 bits).foreach(_ := input(MEMORY_STORE_DATA_RF)) + for((cond, value) <- bypassStoreList) when(cond){ + cache.io.cpu.writeBack.storeData := value + } val fence = if(withInvalidate) new Area { cache.io.cpu.writeBack.fence := input(INSTRUCTION)(31 downto 20).as(FenceFlags()) @@ -425,12 +438,15 @@ class DBusCachedPlugin(val config : DataCacheConfig, arbitration.haltItself.setWhen(cache.io.cpu.writeBack.isValid && cache.io.cpu.writeBack.haltIt) - val rspShifted = Bits(32 bits) - rspShifted := cache.io.cpu.writeBack.data + val rspRf = cache.io.cpu.writeBack.data.subdivideIn(32 bits).read(cache.io.cpu.writeBack.address(cache.cpuWordToRfWordRange)) + val rspShifted = CombInit(rspRf) switch(input(MEMORY_ADDRESS_LOW)){ - is(1){rspShifted(7 downto 0) := cache.io.cpu.writeBack.data(15 downto 8)} - is(2){rspShifted(15 downto 0) := cache.io.cpu.writeBack.data(31 downto 16)} - is(3){rspShifted(7 downto 0) := cache.io.cpu.writeBack.data(31 downto 24)} + is(1){rspShifted(7 downto 0) := rspRf(15 downto 8)} + is(2){rspShifted(15 downto 0) := rspRf(31 downto 16)} + is(3){rspShifted(7 downto 0) := rspRf(31 downto 24)} + } + if(withLrSc) when(input(MEMORY_LRSC) && input(MEMORY_WR)){ + rspShifted := B(!cache.io.cpu.writeBack.exclusiveOk).resized } val rspFormated = input(INSTRUCTION)(13 downto 12).mux( @@ -443,7 +459,7 @@ class DBusCachedPlugin(val config : DataCacheConfig, output(REGFILE_WRITE_DATA) := rspFormated } - insert(DBUS_DATA) := cache.io.cpu.writeBack.data + insert(MEMORY_LOAD_DATA) := cache.io.cpu.writeBack.data } //Share access to the dBus (used by self refilled MMU) @@ -458,11 +474,11 @@ class DBusCachedPlugin(val config : DataCacheConfig, cache.io.cpu.execute.isValid := True dBusAccess.cmd.ready := !execute.arbitration.isStuck } - cache.io.cpu.execute.args.wr := dBusAccess.cmd.write - execute.insert(MEMORY_STORE_DATA) := dBusAccess.cmd.data - cache.io.cpu.execute.args.size := dBusAccess.cmd.size - if(withLrSc) cache.io.cpu.execute.args.isLrsc := False - if(withAmo) cache.io.cpu.execute.args.isAmo := False + cache.io.cpu.execute.args.wr := False //dBusAccess.cmd.write +// execute.insert(MEMORY_STORE_DATA_RF) := dBusAccess.cmd.data //Not implemented + cache.io.cpu.execute.args.size := dBusAccess.cmd.size.resized + if(withLrSc) execute.input(MEMORY_LRSC) := False + if(withAmo) execute.input(MEMORY_AMO) := False cache.io.cpu.execute.address := dBusAccess.cmd.address //Will only be 12 muxes forceDatapath := True } @@ -474,7 +490,7 @@ class DBusCachedPlugin(val config : DataCacheConfig, if(mmuAndBufferStage != execute) (cache.io.cpu.memory.isValid setWhen(mmuAndBufferStage.input(IS_DBUS_SHARING))) cache.io.cpu.writeBack.isValid setWhen(managementStage.input(IS_DBUS_SHARING)) dBusAccess.rsp.valid := managementStage.input(IS_DBUS_SHARING) && !cache.io.cpu.writeBack.isWrite && (cache.io.cpu.redo || !cache.io.cpu.writeBack.haltIt) - dBusAccess.rsp.data := cache.io.cpu.writeBack.data + dBusAccess.rsp.data := mgs.rspRf dBusAccess.rsp.error := cache.io.cpu.writeBack.unalignedAccess || cache.io.cpu.writeBack.accessError dBusAccess.rsp.redo := cache.io.cpu.redo component.addPrePopTask{() => diff --git a/src/main/scala/vexriscv/plugin/FpuPlugin.scala b/src/main/scala/vexriscv/plugin/FpuPlugin.scala index 855c397..a7454ea 100644 --- a/src/main/scala/vexriscv/plugin/FpuPlugin.scala +++ b/src/main/scala/vexriscv/plugin/FpuPlugin.scala @@ -146,6 +146,10 @@ class FpuPlugin(externalFpu : Boolean = false, val dBusEncoding = pipeline.service(classOf[DBusEncodingService]) dBusEncoding.addLoadWordEncoding(FLW) dBusEncoding.addStoreWordEncoding(FSW) + if(p.withDouble) { + dBusEncoding.addLoadWordEncoding(FLD) + dBusEncoding.addStoreWordEncoding(FSD) + } } override def build(pipeline: VexRiscv): Unit = { @@ -235,7 +239,7 @@ class FpuPlugin(externalFpu : Boolean = false, when(isRsp){ when(arbitration.isValid) { dBusEncoding.bypassStore(port.rsp.value) - output(REGFILE_WRITE_DATA) := port.rsp.value + output(REGFILE_WRITE_DATA) := port.rsp.value(31 downto 0) } when(!port.rsp.valid){ arbitration.haltByOther := True @@ -247,7 +251,8 @@ class FpuPlugin(externalFpu : Boolean = false, // Manage $load val commit = Stream(FpuCommit(p)) commit.valid := isCommit && !arbitration.isStuck - commit.value := (input(FPU_COMMIT_LOAD) ? output(DBUS_DATA) | input(RS1)) + commit.value(31 downto 0) := (input(FPU_COMMIT_LOAD) ? dBusEncoding.loadData()(31 downto 0) | input(RS1)) + if(p.withDouble) commit.value(63 downto 32) := dBusEncoding.loadData()(63 downto 32) commit.write := arbitration.isValid && !arbitration.removeIt commit.sync := input(FPU_COMMIT_SYNC) diff --git a/src/test/cpp/regression/main.cpp b/src/test/cpp/regression/main.cpp index 9bfa886..6f1dcff 100644 --- a/src/test/cpp/regression/main.cpp +++ b/src/test/cpp/regression/main.cpp @@ -4,7 +4,7 @@ #include "VVexRiscv_RiscvCore.h" #endif #include "verilated.h" -#include "verilated_vcd_c.h" +#include "verilated_fst_c.h" #include #include #include @@ -175,8 +175,12 @@ void loadBinImpl(string path,Memory* mem, uint32_t offset) { #define TEXTIFY(A) #A +void breakMe(){ + int a = 0; +} #define assertEq(x,ref) if(x != ref) {\ printf("\n*** %s is %d but should be %d ***\n\n",TEXTIFY(x),x,ref);\ + breakMe();\ throw std::exception();\ } @@ -1106,7 +1110,7 @@ public: uint32_t bootPc = -1; uint32_t iStall = STALL,dStall = STALL; #ifdef TRACE - VerilatedVcdC* tfp; + VerilatedFstC* tfp; #endif bool allowInvalidate = true; @@ -1129,13 +1133,13 @@ public: class MemWrite { public: int32_t address, size; - uint32_t data; + uint8_t data42[64]; }; class MemRead { public: int32_t address, size; - uint32_t data; + uint8_t data42[64]; bool error; }; @@ -1186,7 +1190,10 @@ public: cout << " DUT : address=" << t.address << " size=" << t.size << endl; fail(); } - *data = t.data; + + for(int i = 0; i < size; i++){ + ((uint8_t*)data)[i] = t.data42[i]; + } periphRead.pop(); return t.error; }else { @@ -1205,10 +1212,8 @@ public: MemWrite w; w.address = address; w.size = size; - switch(size){ - case 1: w.data = data & 0xFF; break; - case 2: w.data = data & 0xFFFF; break; - case 4: w.data = data; break; + for(int i = 0; i < size; i++){ + w.data42[i] = ((uint8_t*)&data)[i]; } periphWritesGolden.push(w); if(periphWritesGolden.size() > 10){ @@ -1231,10 +1236,12 @@ public: case 0: MemWrite t = periphWrites.front(); MemWrite t2 = periphWritesGolden.front(); - if(t.address != t2.address || t.size != t2.size || t.data != t2.data){ + bool dataMatch = true; + for(int i = 0;i < min(t.size, t2.size);i++) dataMatch &= t.data42[i] == t2.data42[i]; + if(t.address != t2.address || t.size != t2.size || !dataMatch){ cout << hex << "periphWrite missmatch" << endl; - cout << " DUT address=" << t.address << " size=" << t.size << " data=" << t.data << endl; - cout << " REF address=" << t2.address << " size=" << t2.size << " data=" << t2.data << endl; + cout << " DUT address=" << t.address << " size=" << t.size << " data=" << *((uint32_t*)t.data42) << endl; + cout << " REF address=" << t2.address << " size=" << t2.size << " data=" << *((uint32_t*)t2.data42) << endl; fail(); } periphWrites.pop(); @@ -1345,43 +1352,19 @@ public: virtual bool isDBusCheckedRegion(uint32_t address){ return isPerifRegion(address);} - virtual void dBusAccess(uint32_t addr,bool wr, uint32_t size,uint32_t mask, uint32_t *data, bool *error) { - assertEq(addr % (1 << size), 0); + virtual void dBusAccess(uint32_t addr,bool wr, uint32_t size, uint8_t *data, bool *error) { + assertEq(addr % size, 0); if(!isPerifRegion(addr)) { if(wr){ - memTraces << - #ifdef TRACE_WITH_TIME - (currentTime - #ifdef REF - -2 - #endif - ) << - #endif - " : WRITE mem" << hex << (1 << size) << "[" << addr << "] = " << *data << dec << endl; - for(uint32_t b = 0;b < (1 << size);b++){ - uint32_t offset = (addr+b)&0x3; - if((mask >> offset) & 1 == 1) - *mem.get(addr + b) = *data >> (offset*8); + for(uint32_t b = 0;b < size;b++){ + *mem.get(addr + b) = ((uint8_t*)data)[b]; } }else{ - *data = VL_RANDOM_I(32); - for(uint32_t b = 0;b < (1 << size);b++){ - uint32_t offset = (addr+b)&0x3; - *data &= ~(0xFF << (offset*8)); - *data |= mem[addr + b] << (offset*8); + uint32_t innerOffset = addr & (DBUS_LOAD_DATA_WIDTH/8-1); + for(uint32_t b = 0;b < size;b++){ + ((uint8_t*)data)[b] = mem[addr + b]; } - /* - memTraces << - #ifdef TRACE_WITH_TIME - (currentTime - #ifdef REF - -2 - #endif - ) << - #endif - " : READ mem" << (1 << size) << "[" << addr << "] = " << *data << endl;*/ - } } @@ -1390,21 +1373,9 @@ public: if(isDBusCheckedRegion(addr)){ CpuRef::MemWrite w; w.address = addr; - while((mask & 1) == 0){ - mask >>= 1; - w.address++; - w.data >>= 8; - } - switch(mask){ - case 1: size = 0; break; - case 3: size = min(1u, size); break; - case 15: size = min(2u, size); break; - } - w.size = 1 << size; - switch(size){ - case 0: w.data = *data & 0xFF; break; - case 1: w.data = *data & 0xFFFF; break; - case 2: w.data = *data ; break; + w.size = size; + for(uint32_t b = 0;b < size;b++){ + w.data42[b] = data[b]; } riscvRef.periphWrites.push(w); } @@ -1412,8 +1383,10 @@ public: if(isPerifRegion(addr)){ CpuRef::MemRead r; r.address = addr; - r.size = 1 << size; - r.data = *data; + r.size = size; + for(uint32_t b = 0;b < size;b++){ + r.data42[b] = data[b]; + } r.error = *error; riscvRef.periphRead.push(r); } @@ -1461,9 +1434,9 @@ public: // init trace dump #ifdef TRACE Verilated::traceEverOn(true); - tfp = new VerilatedVcdC; + tfp = new VerilatedFstC; top->trace(tfp, 99); - tfp->open((vcdName + ".vcd").c_str()); + tfp->open((vcdName + ".fst").c_str()); #endif // Reset @@ -1725,7 +1698,8 @@ public: virtual void dutPutChar(char c){} - virtual void dBusAccess(uint32_t addr,bool wr, uint32_t size,uint32_t mask, uint32_t *data, bool *error) { + virtual void dBusAccess(uint32_t addr,bool wr, uint32_t size, uint8_t *dataBytes, bool *error) { + uint32_t *data = ((uint32_t*)dataBytes); if(wr){ switch(addr){ case 0xF0010000u: { @@ -1788,19 +1762,10 @@ public: case 0xF00FFF4Cu: *data = mTimeCmp >> 32; break; case 0xF0010004u: *data = ~0; break; } - memTraces << - #ifdef TRACE_WITH_TIME - (currentTime - #ifdef REF - -2 - #endif - ) << - #endif - " : READ mem" << (1 << size) << "[" << addr << "] = " << *data << endl; } *error = addr == 0xF00FFF60u; - Workspace::dBusAccess(addr,wr,size,mask,data,error); + Workspace::dBusAccess(addr,wr,size,dataBytes,error); } @@ -2195,7 +2160,7 @@ public: if (top->dBus_cmd_valid && top->dBus_cmd_ready) { pending = true; data_next = top->dBus_cmd_payload_data; - ws->dBusAccess(top->dBus_cmd_payload_address,top->dBus_cmd_payload_wr,top->dBus_cmd_payload_size,0xF,&data_next,&error_next); + ws->dBusAccess(top->dBus_cmd_payload_address,top->dBus_cmd_payload_wr,1 << top->dBus_cmd_payload_size,((uint8_t*)&data_next) + (top->dBus_cmd_payload_address & 3),&error_next); } } @@ -2370,7 +2335,7 @@ public: #include struct DBusCachedTask{ - char data[DBUS_DATA_WIDTH/8]; + char data[DBUS_LOAD_DATA_WIDTH/8]; bool error; bool last; bool exclusive; @@ -2407,12 +2372,14 @@ public: virtual void preCycle(){ if (top->dBus_cmd_valid && top->dBus_cmd_ready) { if(top->dBus_cmd_payload_wr){ + int size = 1 << top->dBus_cmd_payload_size; #ifdef DBUS_INVALIDATE pendingSync += 1; #endif #ifndef DBUS_EXCLUSIVE bool error; - ws->dBusAccess(top->dBus_cmd_payload_address,1,2,top->dBus_cmd_payload_mask,&top->dBus_cmd_payload_data,&error); + int shift = top->dBus_cmd_payload_address & (DBUS_STORE_DATA_WIDTH/8-1); + ws->dBusAccess(top->dBus_cmd_payload_address,1,size,((uint8_t*)&top->dBus_cmd_payload_data) + shift,&error); #else bool cancel = false, error = false; if(top->dBus_cmd_payload_exclusive){ @@ -2424,31 +2391,28 @@ public: if(!cancel) { for(int idx = 0;idx < 1;idx++){ bool localError = false; - ws->dBusAccess(top->dBus_cmd_payload_address+idx*4,1,2,top->dBus_cmd_payload_mask >> idx*4,((uint32_t*)&top->dBus_cmd_payload_data)+idx, &localError); + int shift = top->dBus_cmd_payload_address & (DBUS_STORE_DATA_WIDTH/8-1); + ws->dBusAccess(top->dBus_cmd_payload_address,1,size,((uint8_t*)&top->dBus_cmd_payload_data) + shift,&localError); error |= localError; - - //printf("%d ", (int)localError); } } - // printf("%x %d\n", top->dBus_cmd_payload_address, (int)error); rsp.last = true; rsp.error = error; rsps.push(rsp); #endif } else { bool error = false; - uint32_t beatCount = top->dBus_cmd_payload_length*32/DBUS_DATA_WIDTH; + uint32_t beatCount = (((1 << top->dBus_cmd_payload_size)*8+DBUS_LOAD_DATA_WIDTH-1) / DBUS_LOAD_DATA_WIDTH)-1; + uint32_t startAt = top->dBus_cmd_payload_address; + uint32_t endAt = top->dBus_cmd_payload_address + (1 << top->dBus_cmd_payload_size); + uint32_t address = top->dBus_cmd_payload_address & ~(DBUS_LOAD_DATA_WIDTH/8-1); + uint8_t buffer[64]; + ws->dBusAccess(top->dBus_cmd_payload_address,0,1 << top->dBus_cmd_payload_size,buffer, &error); for(int beat = 0;beat <= beatCount;beat++){ - if(top->dBus_cmd_payload_length == 0){ - uint32_t sel = (top->dBus_cmd_payload_address >> 2) & (DBUS_DATA_WIDTH/32-1); - ws->dBusAccess(top->dBus_cmd_payload_address,0,2,0,((uint32_t*)rsp.data) + sel,&error); - } else { - for(int idx = 0;idx < DBUS_DATA_WIDTH/32;idx++){ - bool localError = false; - ws->dBusAccess(top->dBus_cmd_payload_address + beat * DBUS_DATA_WIDTH/8 + idx*4,0,2,0,((uint32_t*)rsp.data)+idx, &localError); - error |= localError; - } + for(int i = 0;i < DBUS_LOAD_DATA_WIDTH/8;i++){ + rsp.data[i] = (address >= startAt && address < endAt) ? buffer[address-top->dBus_cmd_payload_address] : VL_RANDOM_I(8); + address += 1; } rsp.last = beat == beatCount; #ifdef DBUS_EXCLUSIVE @@ -2485,7 +2449,7 @@ public: rsps.pop(); top->dBus_rsp_valid = 1; top->dBus_rsp_payload_error = rsp.error; - for(int idx = 0;idx < DBUS_DATA_WIDTH/32;idx++){ + for(int idx = 0;idx < DBUS_LOAD_DATA_WIDTH/32;idx++){ ((uint32_t*)&top->dBus_rsp_payload_data)[idx] = ((uint32_t*)rsp.data)[idx]; } top->dBus_rsp_payload_last = rsp.last; @@ -2494,7 +2458,7 @@ public: #endif } else{ top->dBus_rsp_valid = 0; - for(int idx = 0;idx < DBUS_DATA_WIDTH/32;idx++){ + for(int idx = 0;idx < DBUS_LOAD_DATA_WIDTH/32;idx++){ ((uint32_t*)&top->dBus_rsp_payload_data)[idx] = VL_RANDOM_I(32); } top->dBus_rsp_payload_error = VL_RANDOM_I(1); @@ -3092,12 +3056,13 @@ public: } - virtual void dBusAccess(uint32_t addr,bool wr, uint32_t size,uint32_t mask, uint32_t *data, bool *error) { + virtual void dBusAccess(uint32_t addr,bool wr, uint32_t size, uint8_t *dataBytes, bool *error) { if(wr && addr == 0xF00FFF2C){ + uint32_t *data = (uint32_t*)dataBytes; out32 << hex << setw(8) << std::setfill('0') << *data << dec; if(++out32Counter % 4 == 0) out32 << "\n"; } - WorkspaceRegression::dBusAccess(addr,wr,size,mask,data,error); + WorkspaceRegression::dBusAccess(addr,wr,size,dataBytes,error); } virtual void checks(){ @@ -3437,41 +3402,43 @@ public: - virtual void dBusAccess(uint32_t addr,bool wr, uint32_t size,uint32_t mask, uint32_t *data, bool *error) { - if(isPerifRegion(addr)) switch(addr){ - //TODO Emulate peripherals here - case 0xFFFFFFE0: if(wr) fail(); else *data = mTime; break; - case 0xFFFFFFE4: if(wr) fail(); else *data = mTime >> 32; break; - case 0xFFFFFFE8: if(wr) mTimeCmp = (mTimeCmp & 0xFFFFFFFF00000000) | *data; else *data = mTimeCmp; break; - case 0xFFFFFFEC: if(wr) mTimeCmp = (mTimeCmp & 0x00000000FFFFFFFF) | (((uint64_t)*data) << 32); else *data = mTimeCmp >> 32; break; - case 0xFFFFFFF8: - if(wr){ - char c = (char)*data; - cout << c; - logTraces << c; - logTraces.flush(); - onStdout(c); - } else { - #ifdef WITH_USER_IO - if(stdinNonEmpty()){ - char c; - read(0, &c, 1); - *data = c; - } else - #endif - if(!customCin.empty()){ - *data = customCin.front(); - customCin.pop(); - } else { - *data = -1; - } - } - break; - case 0xFFFFFFFC: fail(); break; //Simulation end - default: cout << "Unmapped peripheral access : addr=0x" << hex << addr << " wr=" << wr << " mask=0x" << mask << " data=0x" << data << dec << endl; fail(); break; - } + virtual void dBusAccess(uint32_t addr,bool wr, uint32_t size,uint8_t *dataBytes, bool *error) { + uint32_t *data = (uint32_t*)dataBytes; - Workspace::dBusAccess(addr,wr,size,mask,data,error); + if(isPerifRegion(addr)) { + switch(addr){ + case 0xFFFFFFE0: if(wr) fail(); else *data = mTime; break; + case 0xFFFFFFE4: if(wr) fail(); else *data = mTime >> 32; break; + case 0xFFFFFFE8: if(wr) mTimeCmp = (mTimeCmp & 0xFFFFFFFF00000000) | *data; else *data = mTimeCmp; break; + case 0xFFFFFFEC: if(wr) mTimeCmp = (mTimeCmp & 0x00000000FFFFFFFF) | (((uint64_t)*data) << 32); else *data = mTimeCmp >> 32; break; + case 0xFFFFFFF8: + if(wr){ + char c = (char)*data; + cout << c; + logTraces << c; + logTraces.flush(); + onStdout(c); + } else { + #ifdef WITH_USER_IO + if(stdinNonEmpty()){ + char c; + read(0, &c, 1); + *data = c; + } else + #endif + if(!customCin.empty()){ + *data = customCin.front(); + customCin.pop(); + } else { + *data = -1; + } + } + break; + case 0xFFFFFFFC: fail(); break; //Simulation end + default: cout << "Unmapped peripheral access : addr=0x" << hex << addr << " wr=" << wr << " mask=0x" << " data=0x" << data << dec << endl; fail(); break; + } + } + Workspace::dBusAccess(addr,wr,size,dataBytes,error); } virtual void onStdout(char c){ @@ -3541,9 +3508,9 @@ public: - virtual void dBusAccess(uint32_t addr,bool wr, uint32_t size,uint32_t mask, uint32_t *data, bool *error) { + virtual void dBusAccess(uint32_t addr,bool wr, uint32_t size,uint64_t mask, uint8_t *dataBytes, bool *error) { + uint32_t *data = (uint32_t*)dataBytes; if(isPerifRegion(addr)) switch(addr){ - //TODO Emulate peripherals here case 0xF0010000: if(wr && *data != 0) fail(); else *data = 0; break; case 0xF001BFF8: if(wr) fail(); else *data = mTime; break; case 0xF001BFFC: if(wr) fail(); else *data = mTime >> 32; break; @@ -3576,8 +3543,7 @@ public: break; default: cout << "Unmapped peripheral access : addr=0x" << hex << addr << " wr=" << wr << " mask=0x" << mask << " data=0x" << data << dec << endl; fail(); break; } - - Workspace::dBusAccess(addr,wr,size,mask,data,error); + Workspace::dBusAccess(addr,wr,size,mask,data,error); } virtual void onStdout(char c){ @@ -3891,7 +3857,7 @@ int main(int argc, char **argv, char **env) { redo(REDO,RiscvTest(name).bootAt(0x80000188u)->writeWord(0x80000184u, 0x00305073)->run();) } #endif - return 0; + //return 0; //#ifdef LITEX // LitexSoC("linux") @@ -4064,11 +4030,6 @@ int main(int argc, char **argv, char **env) { redo(REDO,RiscvTest(name).run();) } - #ifdef RVF - for(const string &name : riscvTestFloat){ - redo(REDO,RiscvTest(name).run();) - } - #endif #ifdef MUL for(const string &name : riscvTestMul){ diff --git a/src/test/cpp/regression/makefile b/src/test/cpp/regression/makefile index f63c812..74f85f5 100644 --- a/src/test/cpp/regression/makefile +++ b/src/test/cpp/regression/makefile @@ -5,7 +5,8 @@ IBUS?=CACHED IBUS_TC?=no IBUS_DATA_WIDTH?=32 DBUS?=CACHED -DBUS_DATA_WIDTH?=32 +DBUS_LOAD_DATA_WIDTH?=32 +DBUS_STORE_DATA_WIDTH?=32 TRACE?=no TRACE_ACCESS?=no TRACE_START=0 @@ -50,7 +51,8 @@ WITH_USER_IO?=no ADDCFLAGS += -CFLAGS -DREGRESSION_PATH='\"$(REGRESSION_PATH)/\"' ADDCFLAGS += -CFLAGS -DIBUS_${IBUS} ADDCFLAGS += -CFLAGS -DIBUS_DATA_WIDTH=${IBUS_DATA_WIDTH} -ADDCFLAGS += -CFLAGS -DDBUS_DATA_WIDTH=${DBUS_DATA_WIDTH} +ADDCFLAGS += -CFLAGS -DDBUS_LOAD_DATA_WIDTH=${DBUS_LOAD_DATA_WIDTH} +ADDCFLAGS += -CFLAGS -DDBUS_STORE_DATA_WIDTH=${DBUS_STORE_DATA_WIDTH} ADDCFLAGS += -CFLAGS -DDBUS_${DBUS} ADDCFLAGS += -CFLAGS -DREDO=${REDO} @@ -197,9 +199,8 @@ ifneq ($(SEED),no) ADDCFLAGS += -CFLAGS -DSEED=${SEED} endif - ifeq ($(TRACE),yes) - VERILATOR_ARGS += --trace + VERILATOR_ARGS += --trace-fst ADDCFLAGS += -CFLAGS -DTRACE endif diff --git a/src/test/scala/vexriscv/DhrystoneBench.scala b/src/test/scala/vexriscv/DhrystoneBench.scala index a99b5d8..a98377c 100644 --- a/src/test/scala/vexriscv/DhrystoneBench.scala +++ b/src/test/scala/vexriscv/DhrystoneBench.scala @@ -50,100 +50,100 @@ class DhrystoneBench extends FunSuite { } -// for(withMemoryStage <- List(false, true)){ -// val stages = if(withMemoryStage) "Three" else "Two" -// getDmips( -// name = s"Gen${stages}StageArty", -// gen = SpinalVerilog(GenTwoThreeStage.cpu( -// withMulDiv = false, -// bypass = false, -// barrielShifter = false, -// withMemoryStage = withMemoryStage -// )), -// testCmd = "make clean run REDO=10 IBUS=SIMPLE DBUS=SIMPLE CSR=no MMU=no DEBUG_PLUGIN=no MUL=no DIV=no COREMARK=yes" -// ) -// getDmips( -// name = s"Gen${stages}StageBarrielArty", -// gen = SpinalVerilog(GenTwoThreeStage.cpu( -// withMulDiv = false, -// bypass = true, -// barrielShifter = true, -// withMemoryStage = withMemoryStage -// )), -// testCmd = "make clean run REDO=10 IBUS=SIMPLE DBUS=SIMPLE CSR=no MMU=no DEBUG_PLUGIN=no MUL=no DIV=no COREMARK=yes" -// ) -// getDmips( -// name = s"Gen${stages}StageMDArty", -// gen = SpinalVerilog(GenTwoThreeStage.cpu( -// withMulDiv = true, -// bypass = false, -// barrielShifter = false, -// withMemoryStage = withMemoryStage -// )), -// testCmd = "make clean run REDO=10 IBUS=SIMPLE DBUS=SIMPLE CSR=no MMU=no DEBUG_PLUGIN=no MUL=yes DIV=yes COREMARK=yes" -// ) -// getDmips( -// name = s"Gen${stages}StageMDBarrielArty", -// gen = SpinalVerilog(GenTwoThreeStage.cpu( -// withMulDiv = true, -// bypass = true, -// barrielShifter = true, -// withMemoryStage = withMemoryStage -// )), -// testCmd = "make clean run REDO=10 IBUS=SIMPLE DBUS=SIMPLE CSR=no MMU=no DEBUG_PLUGIN=no MUL=yes DIV=yes COREMARK=yes" -// ) -// } -// -// getDmips( -// name = "GenSmallestNoCsr", -// gen = GenSmallestNoCsr.main(null), -// testCmd = "make clean run REDO=10 IBUS=SIMPLE DBUS=SIMPLE CSR=no MMU=no DEBUG_PLUGIN=no MUL=no DIV=no COREMARK=yes" -// ) -// -// -// getDmips( -// name = "GenSmallest", -// gen = GenSmallest.main(null), -// testCmd = "make clean run REDO=10 IBUS=SIMPLE DBUS=SIMPLE CSR=no MMU=no DEBUG_PLUGIN=no MUL=no DIV=no COREMARK=yes" -// ) -// -// -// getDmips( -// name = "GenSmallAndProductive", -// gen = GenSmallAndProductive.main(null), -// testCmd = "make clean run REDO=10 IBUS=SIMPLE DBUS=SIMPLE CSR=no MMU=no DEBUG_PLUGIN=no MUL=no DIV=no COREMARK=yes" -// ) -// -// getDmips( -// name = "GenSmallAndProductiveWithICache", -// gen = GenSmallAndProductiveICache.main(null), -// testCmd = "make clean run REDO=10 IBUS=CACHED DBUS=SIMPLE CSR=no MMU=no DEBUG_PLUGIN=no MUL=no DIV=no COREMARK=yes" -// ) -// -// -// getDmips( -// name = "GenFullNoMmuNoCache", -// gen = GenFullNoMmuNoCache.main(null), -// testCmd = "make clean run REDO=10 IBUS=SIMPLE DBUS=SIMPLE CSR=no MMU=no COREMARK=yes" -// ) -// -// getDmips( -// name = "GenNoCacheNoMmuMaxPerf", -// gen = GenNoCacheNoMmuMaxPerf.main(null), -// testCmd = "make clean run REDO=10 MMU=no CSR=no DBUS=SIMPLE IBUS=SIMPLE COREMARK=yes" -// ) -// -// -// getDmips( -// name = "GenFullNoMmuMaxPerf", -// gen = GenFullNoMmuMaxPerf.main(null), -// testCmd = "make clean run REDO=10 MMU=no CSR=no COREMARK=yes" -// ) -// getDmips( -// name = "GenFullNoMmu", -// gen = GenFullNoMmu.main(null), -// testCmd = "make clean run REDO=10 MMU=no CSR=no COREMARK=yes" -// ) + for(withMemoryStage <- List(false, true)){ + val stages = if(withMemoryStage) "Three" else "Two" + getDmips( + name = s"Gen${stages}StageArty", + gen = SpinalVerilog(GenTwoThreeStage.cpu( + withMulDiv = false, + bypass = false, + barrielShifter = false, + withMemoryStage = withMemoryStage + )), + testCmd = "make clean run REDO=10 IBUS=SIMPLE DBUS=SIMPLE CSR=no MMU=no DEBUG_PLUGIN=no MUL=no DIV=no COREMARK=yes" + ) + getDmips( + name = s"Gen${stages}StageBarrielArty", + gen = SpinalVerilog(GenTwoThreeStage.cpu( + withMulDiv = false, + bypass = true, + barrielShifter = true, + withMemoryStage = withMemoryStage + )), + testCmd = "make clean run REDO=10 IBUS=SIMPLE DBUS=SIMPLE CSR=no MMU=no DEBUG_PLUGIN=no MUL=no DIV=no COREMARK=yes" + ) + getDmips( + name = s"Gen${stages}StageMDArty", + gen = SpinalVerilog(GenTwoThreeStage.cpu( + withMulDiv = true, + bypass = false, + barrielShifter = false, + withMemoryStage = withMemoryStage + )), + testCmd = "make clean run REDO=10 IBUS=SIMPLE DBUS=SIMPLE CSR=no MMU=no DEBUG_PLUGIN=no MUL=yes DIV=yes COREMARK=yes" + ) + getDmips( + name = s"Gen${stages}StageMDBarrielArty", + gen = SpinalVerilog(GenTwoThreeStage.cpu( + withMulDiv = true, + bypass = true, + barrielShifter = true, + withMemoryStage = withMemoryStage + )), + testCmd = "make clean run REDO=10 IBUS=SIMPLE DBUS=SIMPLE CSR=no MMU=no DEBUG_PLUGIN=no MUL=yes DIV=yes COREMARK=yes" + ) + } + + getDmips( + name = "GenSmallestNoCsr", + gen = GenSmallestNoCsr.main(null), + testCmd = "make clean run REDO=10 IBUS=SIMPLE DBUS=SIMPLE CSR=no MMU=no DEBUG_PLUGIN=no MUL=no DIV=no COREMARK=yes" + ) + + + getDmips( + name = "GenSmallest", + gen = GenSmallest.main(null), + testCmd = "make clean run REDO=10 IBUS=SIMPLE DBUS=SIMPLE CSR=no MMU=no DEBUG_PLUGIN=no MUL=no DIV=no COREMARK=yes" + ) + + + getDmips( + name = "GenSmallAndProductive", + gen = GenSmallAndProductive.main(null), + testCmd = "make clean run REDO=10 IBUS=SIMPLE DBUS=SIMPLE CSR=no MMU=no DEBUG_PLUGIN=no MUL=no DIV=no COREMARK=yes" + ) + + getDmips( + name = "GenSmallAndProductiveWithICache", + gen = GenSmallAndProductiveICache.main(null), + testCmd = "make clean run REDO=10 IBUS=CACHED DBUS=SIMPLE CSR=no MMU=no DEBUG_PLUGIN=no MUL=no DIV=no COREMARK=yes" + ) + + + getDmips( + name = "GenFullNoMmuNoCache", + gen = GenFullNoMmuNoCache.main(null), + testCmd = "make clean run REDO=10 IBUS=SIMPLE DBUS=SIMPLE CSR=no MMU=no COREMARK=yes" + ) + + getDmips( + name = "GenNoCacheNoMmuMaxPerf", + gen = GenNoCacheNoMmuMaxPerf.main(null), + testCmd = "make clean run REDO=10 MMU=no CSR=no DBUS=SIMPLE IBUS=SIMPLE COREMARK=yes" + ) + + + getDmips( + name = "GenFullNoMmuMaxPerf", + gen = GenFullNoMmuMaxPerf.main(null), + testCmd = "make clean run REDO=10 MMU=no CSR=no COREMARK=yes" + ) + getDmips( + name = "GenFullNoMmu", + gen = GenFullNoMmu.main(null), + testCmd = "make clean run REDO=10 MMU=no CSR=no COREMARK=yes" + ) getDmips( name = "GenFull", @@ -151,11 +151,11 @@ class DhrystoneBench extends FunSuite { testCmd = "make clean run REDO=10 CSR=no MMU=no COREMARK=yes" ) -// getDmips( -// name = "GenLinuxBalenced", -// gen = LinuxGen.main(Array.fill[String](0)("")), -// testCmd = "make clean run IBUS=CACHED DBUS=CACHED DEBUG_PLUGIN=STD DHRYSTONE=yes SUPERVISOR=yes MMU=no CSR=yes CSR_SKIP_TEST=yes COMPRESSED=no MUL=yes DIV=yes LRSC=yes AMO=yes REDO=10 TRACE=no COREMARK=yes LINUX_REGRESSION=no" -// ) + getDmips( + name = "GenLinuxBalenced", + gen = LinuxGen.main(Array.fill[String](0)("")), + testCmd = "make clean run IBUS=CACHED DBUS=CACHED DEBUG_PLUGIN=STD DHRYSTONE=yes SUPERVISOR=yes MMU=no CSR=yes CSR_SKIP_TEST=yes COMPRESSED=no MUL=yes DIV=yes LRSC=yes AMO=yes REDO=10 TRACE=no COREMARK=yes LINUX_REGRESSION=no" + ) test("final_report") { diff --git a/src/test/scala/vexriscv/TestIndividualFeatures.scala b/src/test/scala/vexriscv/TestIndividualFeatures.scala index 0cf5ce6..5e015ac 100644 --- a/src/test/scala/vexriscv/TestIndividualFeatures.scala +++ b/src/test/scala/vexriscv/TestIndividualFeatures.scala @@ -438,6 +438,8 @@ class DBusDimension extends VexRiscvDimension("DBus") { val twoStageMmu = r.nextBoolean() && !noMemory && !noWriteBack val mmuConfig = if(universes.contains(VexRiscvUniverse.MMU)) MmuPortConfig(portTlbSize = 4, latency = if(twoStageMmu) 1 else 0, earlyRequireMmuLockup = Random.nextBoolean() && twoStageMmu, earlyCacheHits = Random.nextBoolean() && twoStageMmu) else null val memDataWidth = List(32,64,128)(r.nextInt(3)) + val cpuDataWidthChoices = List(32,64,128).filter(_ <= memDataWidth) + val cpuDataWidth = cpuDataWidthChoices(r.nextInt(cpuDataWidthChoices.size)) val bytePerLine = Math.max(memDataWidth/8, List(8,16,32,64)(r.nextInt(4))) var cacheSize = 0 var wayCount = 0 @@ -455,8 +457,8 @@ class DBusDimension extends VexRiscvDimension("DBus") { cacheSize = 512 << r.nextInt(5) wayCount = 1 << r.nextInt(3) }while(cacheSize/wayCount < 512 || (catchAll && cacheSize/wayCount > 4096)) - new VexRiscvPosition(s"Cached${memDataWidth}d" + "S" + cacheSize + "W" + wayCount + "BPL" + bytePerLine + (if(dBusCmdMasterPipe) "Cmp " else "") + (if(dBusCmdSlavePipe) "Csp " else "") + (if(dBusRspSlavePipe) "Rsp " else "") + (if(relaxedMemoryTranslationRegister) "Rmtr " else "") + (if(earlyWaysHits) "Ewh " else "") + (if(withAmo) "Amo " else "") + (if(withSmp) "Smp " else "") + (if(directTlbHit) "Dtlb " else "") + (if(twoStageMmu) "Tsmmu " else "") + (if(asyncTagMemory) "Atm" else "")) { - override def testParam = s"DBUS=CACHED DBUS_DATA_WIDTH=$memDataWidth " + (if(withLrSc) "LRSC=yes " else "") + (if(withAmo) "AMO=yes " else "") + (if(withSmp) "DBUS_EXCLUSIVE=yes DBUS_INVALIDATE=yes " else "") + new VexRiscvPosition(s"Cached${memDataWidth}d${cpuDataWidth}c" + "S" + cacheSize + "W" + wayCount + "BPL" + bytePerLine + (if(dBusCmdMasterPipe) "Cmp " else "") + (if(dBusCmdSlavePipe) "Csp " else "") + (if(dBusRspSlavePipe) "Rsp " else "") + (if(relaxedMemoryTranslationRegister) "Rmtr " else "") + (if(earlyWaysHits) "Ewh " else "") + (if(withAmo) "Amo " else "") + (if(withSmp) "Smp " else "") + (if(directTlbHit) "Dtlb " else "") + (if(twoStageMmu) "Tsmmu " else "") + (if(asyncTagMemory) "Atm" else "")) { + override def testParam = s"DBUS=CACHED DBUS_LOAD_DATA_WIDTH=$memDataWidth DBUS_STORE_DATA_WIDTH=$cpuDataWidth " + (if(withLrSc) "LRSC=yes " else "") + (if(withAmo) "AMO=yes " else "") + (if(withSmp) "DBUS_EXCLUSIVE=yes DBUS_INVALIDATE=yes " else "") override def applyOn(config: VexRiscvConfig): Unit = { config.plugins += new DBusCachedPlugin( @@ -465,7 +467,8 @@ class DBusDimension extends VexRiscvDimension("DBus") { bytePerLine = bytePerLine, wayCount = wayCount, addressWidth = 32, - cpuDataWidth = 32, + rfDataWidth = 32, + cpuDataWidth = cpuDataWidth, //Not tested memDataWidth = memDataWidth, catchAccessError = catchAll, catchIllegal = catchAll, diff --git a/src/test/scala/vexriscv/ip/fpu/FpuTest.scala b/src/test/scala/vexriscv/ip/fpu/FpuTest.scala index 43265f8..112252f 100644 --- a/src/test/scala/vexriscv/ip/fpu/FpuTest.scala +++ b/src/test/scala/vexriscv/ip/fpu/FpuTest.scala @@ -19,7 +19,7 @@ import scala.collection.mutable.ArrayBuffer import scala.sys.process.ProcessLogger import scala.util.Random - +//TODO Warning DataCache write aggregation will disable itself class FpuTest extends FunSuite{ val b2f = lang.Float.intBitsToFloat(_) @@ -55,7 +55,7 @@ class FpuTest extends FunSuite{ } def testP(p : FpuParameter){ - val portCount = 4 + val portCount = 1 val config = SimConfig config.allOptimisation @@ -978,32 +978,35 @@ class FpuTest extends FunSuite{ def testSgnjRaw(a : Float, b : Float): Unit ={ - val ref = b2f((f2b(a) & ~0x80000000) | f2b(b) & 0x80000000) + var ref = b2f((f2b(a) & ~0x80000000) | f2b(b) & 0x80000000) + if(a.isNaN) ref = a testBinaryOp(sgnj,a,b,ref,0, null,"sgnj") } def testSgnjnRaw(a : Float, b : Float): Unit ={ - val ref = b2f((f2b(a) & ~0x80000000) | ((f2b(b) & 0x80000000) ^ 0x80000000)) + var ref = b2f((f2b(a) & ~0x80000000) | ((f2b(b) & 0x80000000) ^ 0x80000000)) + if(a.isNaN) ref = a testBinaryOp(sgnjn,a,b,ref,0, null,"sgnjn") } def testSgnjxRaw(a : Float, b : Float): Unit ={ - val ref = b2f(f2b(a) ^ (f2b(b) & 0x80000000)) + var ref = b2f(f2b(a) ^ (f2b(b) & 0x80000000)) + if(a.isNaN) ref = a testBinaryOp(sgnjx,a,b,ref,0, null,"sgnjx") } val f64SignMask = 1l << 63 def testSgnjF64Raw(a : Double, b : Double): Unit ={ var ref = b2d((d2b(a).toLong & ~f64SignMask) | d2b(b).toLong & f64SignMask) - if(d2b(a).toLong >> 32 == -1) ref = a + if(a.isNaN) ref = a testBinaryOpF64(sgnj,a,b,ref,0, null,"sgnj") } def testSgnjnF64Raw(a : Double, b : Double): Unit ={ var ref = b2d((d2b(a).toLong & ~f64SignMask) | ((d2b(b).toLong & f64SignMask) ^ f64SignMask)) - if(d2b(a).toLong >> 32 == -1) ref = a + if(a.isNaN) ref = a testBinaryOpF64(sgnjn,a,b,ref,0, null,"sgnjn") } def testSgnjxF64Raw(a : Double, b : Double): Unit ={ var ref = b2d(d2b(a).toLong ^ (d2b(b).toLong & f64SignMask)) - if(d2b(a).toLong >> 32 == -1) ref = a + if(a.isNaN) ref = a testBinaryOpF64(sgnjx,a,b,ref,0, null,"sgnjx") } @@ -1277,6 +1280,17 @@ class FpuTest extends FunSuite{ //TODO test boxing //TODO double <-> simple convertions if(p.withDouble) { + + for(_ <- 0 until 10000) testSgnjF64() + println("f64 sgnj done") + + for(_ <- 0 until 10000) testSgnjF32() + println("f32 sgnj done") + + //380000000001ffef 5fffffffffff9ff 8000000000100000 +// testBinaryOpF64(mul,-5.877471754282472E-39, 8.814425663400984E-280, -5.180654E-318 ,1, FpuRoundMode.RMM,"mul") +// 5.877471754282472E-39 8.814425663400984E-280 -5.180654E-318 RMM + for(_ <- 0 until 10000) testCvtF64F32() // 1 did not equal 3 Flag missmatch dut=1 ref=3 testCvtF64F32Raw 1.1754942807573643E-38 1.17549435E-38 RMM println("FCVT_D_S done") for(_ <- 0 until 10000) testCvtF32F64() @@ -1288,8 +1302,6 @@ class FpuTest extends FunSuite{ println("f64 f2ui done") - for(_ <- 0 until 10000) testSgnjF64() - println("f64 sgnj done") @@ -1481,7 +1493,7 @@ class FpuTest extends FunSuite{ // dut.clockDomain.waitSampling(1000) // simSuccess() - for(i <- 0 until 10000) fxxTests.randomPick()() + for(i <- 0 until 100000) fxxTests.randomPick()() waitUntil(cpu.rspQueue.isEmpty) }