fpu double fixes

DataCache now support wide load/store
This commit is contained in:
Dolu1990 2021-02-16 15:38:51 +01:00
parent 7d3b35c32c
commit f180ba2fc9
11 changed files with 434 additions and 521 deletions

View File

@ -32,189 +32,83 @@ import vexriscv.ip.fpu.FpuParameter
//make clean all SEED=42 MMU=no STOP_ON_ERROR=yes DBUS_EXCLUSIVE=yes DBUS_INVALIDATE=yes SUPERVISOR=yes REDO=1 DHRYSTONE=yes LRSC=yes AMO=yes TRACE=yes TRACE_START=1000000000 FLOW_INFO=ye IBUS_DATA_WIDTH=128 DBUS_DATA_WIDTH=128 LINUX_SOC_SMP=yes VMLINUX=../../../../../buildroot/output/images/Image RAMDISK=../../../../../buildroot/output/images/rootfs.cpio DTB=../../../../../buildroot/output/images/dtb EMULATOR=../../../../../opensbi/build/platform/spinal/vexriscv/sim/smp/firmware/fw_jump.bin
object TestsWorkspace {
def main(args: Array[String]) {
// def configFull = {
// val config = VexRiscvConfig(
// plugins = List(
// new MmuPlugin(
// ioRange = x => x(31 downto 28) === 0xF
// ),
// //Uncomment the whole IBusSimplePlugin and comment IBusCachedPlugin if you want uncached iBus config
// // new IBusSimplePlugin(
// // resetVector = 0x80000000l,
// // cmdForkOnSecondStage = false,
// // cmdForkPersistence = false,
// // prediction = DYNAMIC_TARGET,
// // historyRamSizeLog2 = 10,
// // catchAccessFault = true,
// // compressedGen = true,
// // busLatencyMin = 1,
// // injectorStage = true,
// // memoryTranslatorPortConfig = withMmu generate MmuPortConfig(
// // portTlbSize = 4
// // )
// // ),
//
// //Uncomment the whole IBusCachedPlugin and comment IBusSimplePlugin if you want cached iBus config
// new IBusCachedPlugin(
// resetVector = 0x80000000l,
// compressedGen = false,
// prediction = STATIC,
// injectorStage = false,
// config = InstructionCacheConfig(
// cacheSize = 4096*2,
// bytePerLine = 64,
// wayCount = 2,
// addressWidth = 32,
// cpuDataWidth = 32,
// memDataWidth = 128,
// catchIllegalAccess = true,
// catchAccessFault = true,
// asyncTagMemory = false,
// twoCycleRam = true,
// twoCycleCache = true,
// reducedBankWidth = true
// // )
// ),
// memoryTranslatorPortConfig = MmuPortConfig(
// portTlbSize = 4,
// latency = 1,
// earlyRequireMmuLockup = true,
// earlyCacheHits = true
// )
// ),
// // ).newTightlyCoupledPort(TightlyCoupledPortParameter("iBusTc", a => a(30 downto 28) === 0x0 && a(5))),
// // new DBusSimplePlugin(
// // catchAddressMisaligned = true,
// // catchAccessFault = true,
// // earlyInjection = false,
// // withLrSc = true,
// // memoryTranslatorPortConfig = withMmu generate MmuPortConfig(
// // portTlbSize = 4
// // )
// // ),
// new DBusCachedPlugin(
// dBusCmdMasterPipe = true,
// dBusCmdSlavePipe = true,
// dBusRspSlavePipe = true,
// config = new DataCacheConfig(
// cacheSize = 4096*1,
// bytePerLine = 64,
// wayCount = 1,
// addressWidth = 32,
// cpuDataWidth = 32,
// memDataWidth = 128,
// catchAccessError = true,
// catchIllegal = true,
// catchUnaligned = true,
// withLrSc = true,
// withAmo = true,
// withExclusive = true,
// withInvalidate = true,
// pendingMax = 32
// // )
// ),
// memoryTranslatorPortConfig = MmuPortConfig(
// portTlbSize = 4,
// latency = 1,
// earlyRequireMmuLockup = true,
// earlyCacheHits = true
// )
// ),
//
// // new MemoryTranslatorPlugin(
// // tlbSize = 32,
// // virtualRange = _(31 downto 28) === 0xC,
// // ioRange = _(31 downto 28) === 0xF
// // ),
//
// new DecoderSimplePlugin(
// catchIllegalInstruction = true
// ),
// new RegFilePlugin(
// regFileReadyKind = plugin.ASYNC,
// zeroBoot = true
// ),
// new IntAluPlugin,
// new SrcPlugin(
// separatedAddSub = false
// ),
// new FullBarrelShifterPlugin(earlyInjection = false),
// // new LightShifterPlugin,
// new HazardSimplePlugin(
// bypassExecute = true,
// bypassMemory = true,
// bypassWriteBack = true,
// bypassWriteBackBuffer = true,
// pessimisticUseSrc = false,
// pessimisticWriteRegFile = false,
// pessimisticAddressMatch = false
// ),
// // new HazardSimplePlugin(false, true, false, true),
// // new HazardSimplePlugin(false, false, false, false),
// new MulPlugin,
// new MulDivIterativePlugin(
// genMul = false,
// genDiv = true,
// mulUnrollFactor = 32,
// divUnrollFactor = 1
// ),
// // new DivPlugin,
// new CsrPlugin(CsrPluginConfig.all2(0x80000020l).copy(ebreakGen = false, misaExtensionsInit = Riscv.misaToInt("imas"))),
// // new CsrPlugin(//CsrPluginConfig.all2(0x80000020l).copy(ebreakGen = true)/*
// // CsrPluginConfig(
// // catchIllegalAccess = false,
// // mvendorid = null,
// // marchid = null,
// // mimpid = null,
// // mhartid = null,
// // misaExtensionsInit = 0,
// // misaAccess = CsrAccess.READ_ONLY,
// // mtvecAccess = CsrAccess.WRITE_ONLY,
// // mtvecInit = 0x80000020l,
// // mepcAccess = CsrAccess.READ_WRITE,
// // mscratchGen = true,
// // mcauseAccess = CsrAccess.READ_ONLY,
// // mbadaddrAccess = CsrAccess.READ_ONLY,
// // mcycleAccess = CsrAccess.NONE,
// // minstretAccess = CsrAccess.NONE,
// // ecallGen = true,
// // ebreakGen = true,
// // wfiGenAsWait = false,
// // wfiGenAsNop = true,
// // ucycleAccess = CsrAccess.NONE
// // )),
// new DebugPlugin(ClockDomain.current.clone(reset = Bool().setName("debugReset"))),
// new BranchPlugin(
// earlyBranch = false,
// catchAddressMisaligned = true,
// fenceiGenAsAJump = false
// ),
// new YamlPlugin("cpu0.yaml")
// )
// )
// config
// }
// import spinal.core.sim._
// SimConfig.withConfig(SpinalConfig(mergeAsyncProcess = false, anonymSignalPrefix = "zz_")).allOptimisation.compile(new VexRiscv(configFull)).doSimUntilVoid{ dut =>
// dut.clockDomain.forkStimulus(10)
// dut.clockDomain.forkSimSpeedPrinter(4)
// var iBus : InstructionCacheMemBus = null
//
// dut.plugins.foreach{
// case plugin: IBusCachedPlugin => iBus = plugin.iBus
// case _ =>
// }
// dut.clockDomain.onSamplings{
//// iBus.cmd.ready.randomize()
// iBus.rsp.data #= 0x13
// }
// }
SpinalConfig().generateVerilog {
// make clean run REDO=10 CSR=no MMU=no COREMARK=no RVF=yes REDO=1 TRACE=yes
val config = GenFull.config
// make clean all REDO=10 CSR=no MMU=no COREMARK=no RVF=no REDO=1 DBUS_LOAD_DATA_WIDTH=64 DBUS_STORE_DATA_WIDTH=64 DEBUG=ye TRACE=ye
val config = VexRiscvConfig(
plugins = List(
new IBusCachedPlugin(
prediction = DYNAMIC,
config = InstructionCacheConfig(
cacheSize = 4096,
bytePerLine =32,
wayCount = 1,
addressWidth = 32,
cpuDataWidth = 32,
memDataWidth = 32,
catchIllegalAccess = true,
catchAccessFault = true,
asyncTagMemory = false,
twoCycleRam = true,
twoCycleCache = true
),
memoryTranslatorPortConfig = MmuPortConfig(
portTlbSize = 4
)
),
new DBusCachedPlugin(
config = new DataCacheConfig(
cacheSize = 4096,
bytePerLine = 32,
wayCount = 1,
addressWidth = 32,
rfDataWidth = 32,
cpuDataWidth = 64,
memDataWidth = 64,
catchAccessError = true,
catchIllegal = true,
catchUnaligned = true
),
memoryTranslatorPortConfig = MmuPortConfig(
portTlbSize = 6
)
),
new MmuPlugin(
virtualRange = _(31 downto 28) === 0xC,
ioRange = _(31 downto 28) === 0xF
),
new DecoderSimplePlugin(
catchIllegalInstruction = true
),
new RegFilePlugin(
regFileReadyKind = plugin.SYNC,
zeroBoot = false
),
new IntAluPlugin,
new SrcPlugin(
separatedAddSub = false,
executeInsertion = true
),
new FullBarrelShifterPlugin,
new HazardSimplePlugin(
bypassExecute = true,
bypassMemory = true,
bypassWriteBack = true,
bypassWriteBackBuffer = true,
pessimisticUseSrc = false,
pessimisticWriteRegFile = false,
pessimisticAddressMatch = false
),
new MulPlugin,
new DivPlugin,
new CsrPlugin(CsrPluginConfig.small(0x80000020l)),
new DebugPlugin(ClockDomain.current.clone(reset = Bool().setName("debugReset"))),
new BranchPlugin(
earlyBranch = false,
catchAddressMisaligned = true
),
new YamlPlugin("cpu0.yaml")
)
)
config.plugins += new FpuPlugin(
externalFpu = false,
p = FpuParameter(

View File

@ -46,7 +46,6 @@ case class VexRiscvConfig(){
object LEGAL_INSTRUCTION extends Stageable(Bool)
object REGFILE_WRITE_VALID extends Stageable(Bool)
object REGFILE_WRITE_DATA extends Stageable(Bits(32 bits))
object DBUS_DATA extends Stageable(Bits(32 bits))
object MPP extends PipelineThing[UInt]
object DEBUG_BYPASS_CACHE extends PipelineThing[Bool]

View File

@ -16,6 +16,7 @@ case class DataCacheConfig(cacheSize : Int,
wayCount : Int,
addressWidth : Int,
cpuDataWidth : Int,
var rfDataWidth : Int = -1, //-1 mean cpuDataWidth
memDataWidth : Int,
catchAccessError : Boolean,
catchIllegal : Boolean,
@ -31,10 +32,17 @@ case class DataCacheConfig(cacheSize : Int,
directTlbHit : Boolean = false,
mergeExecuteMemory : Boolean = false,
asyncTagMemory : Boolean = false,
aggregationWidth : Int = 0){
withWriteAggregation : Boolean = false){
if(rfDataWidth == -1) rfDataWidth = cpuDataWidth
assert(!(mergeExecuteMemory && (earlyDataMux || earlyWaysHits)))
assert(!(earlyDataMux && !earlyWaysHits))
assert(isPow2(pendingMax))
assert(rfDataWidth <= memDataWidth)
def sizeMax = log2Up(bytePerLine)
def sizeWidth = log2Up(sizeMax + 1)
val aggregationWidth = if(withWriteAggregation) log2Up(memDataBytes+1) else 0
def withWriteResponse = withExclusive
def burstSize = bytePerLine*8/memDataWidth
val burstLength = bytePerLine/(cpuDataWidth/8)
@ -44,6 +52,7 @@ case class DataCacheConfig(cacheSize : Int,
def withExternalLrSc = withLrSc && withExclusive
def withExternalAmo = withAmo && withExclusive
def cpuDataBytes = cpuDataWidth/8
def rfDataBytes = rfDataWidth/8
def memDataBytes = memDataWidth/8
def getAxi4SharedConfig() = Axi4Config(
addressWidth = addressWidth,
@ -55,6 +64,7 @@ case class DataCacheConfig(cacheSize : Int,
useQos = false
)
def getAvalonConfig() = AvalonMMConfig.bursted(
addressWidth = addressWidth,
dataWidth = memDataWidth,
@ -87,7 +97,7 @@ case class DataCacheConfig(cacheSize : Int,
dataWidth = memDataWidth
).addSources(1, BmbSourceParameter(
lengthWidth = log2Up(this.bytePerLine),
contextWidth = (if(!withWriteResponse) 1 else 0) + (if(cpuDataWidth != memDataWidth) log2Up(memDataBytes) else 0),
contextWidth = (if(!withWriteResponse) 1 else 0) + aggregationWidth,
alignment = BmbParameter.BurstAlignement.LENGTH,
canExclusive = withExclusive,
withCachedRead = true
@ -120,7 +130,7 @@ case class DataCacheCpuExecute(p : DataCacheConfig) extends Bundle with IMasterS
case class DataCacheCpuExecuteArgs(p : DataCacheConfig) extends Bundle{
val wr = Bool
val size = UInt(2 bits)
val size = UInt(log2Up(log2Up(p.cpuDataBytes)+1) bits)
val isLrsc = p.withLrSc generate Bool()
val isAmo = p.withAmo generate Bool()
val amoCtrl = p.withAmo generate new Bundle {
@ -174,10 +184,11 @@ case class DataCacheCpuWriteBack(p : DataCacheConfig) extends Bundle with IMaste
val mmuException, unalignedAccess, accessError = Bool()
val keepMemRspData = Bool() //Used by external AMO to avoid having an internal buffer
val fence = FenceFlags()
val exclusiveOk = Bool()
override def asMaster(): Unit = {
out(isValid,isStuck,isUser, address, fence, storeData)
in(haltIt, data, mmuException, unalignedAccess, accessError, isWrite, keepMemRspData)
in(haltIt, data, mmuException, unalignedAccess, accessError, isWrite, keepMemRspData, exclusiveOk)
}
}
@ -205,9 +216,18 @@ case class DataCacheMemCmd(p : DataCacheConfig) extends Bundle{
val address = UInt(p.addressWidth bit)
val data = Bits(p.cpuDataWidth bits)
val mask = Bits(p.cpuDataWidth/8 bits)
val length = UInt(log2Up(p.burstLength) bits)
val size = UInt(p.sizeWidth bits) //... 1 => 2 bytes ... 2 => 4 bytes ...
val exclusive = p.withExclusive generate Bool()
val last = Bool
// def beatCountMinusOne = size.muxListDc((0 to p.sizeMax).map(i => i -> U((1 << i)/p.memDataBytes)))
// def beatCount = size.muxListDc((0 to p.sizeMax).map(i => i -> U((1 << i)/p.memDataBytes-1)))
//Utilities which does quite a few assumtions about the bus utilisation
def byteCountMinusOne = size.muxListDc((0 to p.sizeMax).map(i => i -> U((1 << i)-1, log2Up(p.bytePerLine) bits)))
def beatCountMinusOne = (size === log2Up(p.bytePerLine)) ? U(p.burstSize-1) | U(0)
def beatCount = (size === log2Up(p.bytePerLine)) ? U(p.burstSize) | U(1)
def isBurst = size === log2Up(p.bytePerLine)
}
case class DataCacheMemRsp(p : DataCacheConfig) extends Bundle{
val aggregated = UInt(p.aggregationWidth bits)
@ -267,9 +287,9 @@ case class DataCacheMemBus(p : DataCacheConfig) extends Bundle with IMasterSlave
axi.sharedCmd.write := cmdStage.wr
axi.sharedCmd.prot := "010"
axi.sharedCmd.cache := "1111"
axi.sharedCmd.size := log2Up(p.memDataWidth/8)
axi.sharedCmd.size := cmd.size.max(log2Up(p.memDataBytes))
axi.sharedCmd.addr := cmdStage.address
axi.sharedCmd.len := cmdStage.length.resized
axi.sharedCmd.len := cmd.beatCountMinusOne.resized
axi.writeData.arbitrationFrom(dataStage)
axi.writeData.data := dataStage.data
@ -293,7 +313,7 @@ case class DataCacheMemBus(p : DataCacheConfig) extends Bundle with IMasterSlave
mm.read := cmd.valid && !cmd.wr
mm.write := cmd.valid && cmd.wr
mm.address := cmd.address(cmd.address.high downto log2Up(p.memDataWidth/8)) @@ U(0,log2Up(p.memDataWidth/8) bits)
mm.burstCount := cmd.length + U(1, widthOf(mm.burstCount) bits)
mm.burstCount := cmd.beatCount
mm.byteEnable := cmd.mask
mm.writeData := cmd.data
@ -302,23 +322,25 @@ case class DataCacheMemBus(p : DataCacheConfig) extends Bundle with IMasterSlave
rsp.data := mm.readData
rsp.error := mm.response =/= AvalonMM.Response.OKAY
assert(p.cpuDataWidth == p.rfDataWidth)
mm
}
def toWishbone(): Wishbone = {
assert(p.cpuDataWidth == p.rfDataWidth)
val wishboneConfig = p.getWishboneConfig()
val bus = Wishbone(wishboneConfig)
val counter = Reg(UInt(log2Up(p.burstSize) bits)) init(0)
val cmdBridge = Stream (DataCacheMemCmd(p))
val isBurst = cmdBridge.length =/= 0
val isBurst = cmdBridge.isBurst
cmdBridge.valid := cmd.valid
cmdBridge.address := (isBurst ? (cmd.address(31 downto widthOf(counter) + 2) @@ counter @@ U"00") | (cmd.address(31 downto 2) @@ U"00"))
cmdBridge.wr := cmd.wr
cmdBridge.mask := cmd.mask
cmdBridge.data := cmd.data
cmdBridge.length := cmd.length
cmdBridge.last := counter === cmd.length
cmdBridge.size := cmd.size
cmdBridge.last := !isBurst || counter === p.burstSize-1
cmd.ready := cmdBridge.ready && (cmdBridge.wr || cmdBridge.last)
@ -351,6 +373,7 @@ case class DataCacheMemBus(p : DataCacheConfig) extends Bundle with IMasterSlave
def toPipelinedMemoryBus(): PipelinedMemoryBus = {
val bus = PipelinedMemoryBus(32,32)
assert(p.cpuDataWidth == p.rfDataWidth)
val counter = Reg(UInt(log2Up(p.burstSize) bits)) init(0)
when(bus.cmd.fire){ counter := counter + 1 }
@ -361,7 +384,7 @@ case class DataCacheMemBus(p : DataCacheConfig) extends Bundle with IMasterSlave
bus.cmd.write := cmd.wr
bus.cmd.mask := cmd.mask
bus.cmd.data := cmd.data
cmd.ready := bus.cmd.ready && (cmd.wr || counter === cmd.length)
cmd.ready := bus.cmd.ready && (cmd.wr || counter === p.burstSize-1)
rsp.valid := bus.rsp.valid
rsp.data := bus.rsp.payload.data
rsp.error := False
@ -374,14 +397,16 @@ case class DataCacheMemBus(p : DataCacheConfig) extends Bundle with IMasterSlave
setCompositeName(DataCacheMemBus.this, "Bridge", true)
val pipelinedMemoryBusConfig = p.getBmbParameter()
val bus = Bmb(pipelinedMemoryBusConfig).setCompositeName(this,"toBmb", true)
val aggregationMax = p.memDataBytes
case class Context() extends Bundle{
val isWrite = !p.withWriteResponse generate Bool()
val rspCount = (p.cpuDataWidth != p.memDataWidth) generate UInt(log2Up(aggregationMax) bits)
val rspCount = (p.aggregationWidth != 0) generate UInt(p.aggregationWidth bits)
}
val withoutWriteBuffer = if(p.cpuDataWidth == p.memDataWidth) new Area {
def sizeToLength(size : UInt) = size.muxListDc((0 to log2Up(p.cpuDataBytes)).map(i => U(i) -> U((1 << i)-1, log2Up(p.cpuDataBytes) bits)))
val withoutWriteBuffer = if(p.aggregationWidth == 0) new Area {
val busCmdContext = Context()
bus.cmd.valid := cmd.valid
@ -389,7 +414,7 @@ case class DataCacheMemBus(p : DataCacheConfig) extends Bundle with IMasterSlave
bus.cmd.opcode := (cmd.wr ? B(Bmb.Cmd.Opcode.WRITE) | B(Bmb.Cmd.Opcode.READ))
bus.cmd.address := cmd.address.resized
bus.cmd.data := cmd.data
bus.cmd.length := (cmd.length << 2) | 3
bus.cmd.length := cmd.byteCountMinusOne
bus.cmd.mask := cmd.mask
if (p.withExclusive) bus.cmd.exclusive := cmd.exclusive
if (!p.withWriteResponse) busCmdContext.isWrite := cmd.wr
@ -399,7 +424,7 @@ case class DataCacheMemBus(p : DataCacheConfig) extends Bundle with IMasterSlave
if(p.withInvalidate) sync.arbitrationFrom(bus.sync)
}
val withWriteBuffer = if(p.cpuDataWidth != p.memDataWidth) new Area {
val withWriteBuffer = if(p.aggregationWidth != 0) new Area {
val buffer = new Area {
val stream = cmd.toEvent().m2sPipe()
val address = Reg(UInt(p.addressWidth bits))
@ -413,7 +438,7 @@ case class DataCacheMemBus(p : DataCacheConfig) extends Bundle with IMasterSlave
val aggregationRange = log2Up(p.memDataWidth/8)-1 downto log2Up(p.cpuDataWidth/8)
val tagRange = p.addressWidth-1 downto aggregationRange.high+1
val aggregationEnabled = Reg(Bool)
val aggregationCounter = Reg(UInt(log2Up(aggregationMax) bits)) init(0)
val aggregationCounter = Reg(UInt(p.aggregationWidth bits)) init(0)
val aggregationCounterFull = aggregationCounter === aggregationCounter.maxValue
val timer = Reg(UInt(log2Up(timeoutCycles)+1 bits)) init(0)
val timerFull = timer.msb
@ -467,7 +492,7 @@ case class DataCacheMemBus(p : DataCacheConfig) extends Bundle with IMasterSlave
when(cmd.fire){
buffer.write := cmd.wr
buffer.address := cmd.address.resized
buffer.length := (cmd.length << 2) | 3
buffer.length := cmd.byteCountMinusOne
if (p.withExclusive) buffer.exclusive := cmd.exclusive
when(cmd.wr && !cmd.uncached && !cmdExclusive){
@ -484,7 +509,7 @@ case class DataCacheMemBus(p : DataCacheConfig) extends Bundle with IMasterSlave
rsp.aggregated := rspCtx.rspCount
val syncLogic = p.withInvalidate generate new Area{
val cmdCtx = Stream(UInt(log2Up(aggregationMax) bits))
val cmdCtx = Stream(UInt(p.aggregationWidth bits))
cmdCtx.valid := bus.cmd.fire && bus.cmd.isWrite
cmdCtx.payload := aggregationCounter
halt setWhen(!cmdCtx.ready)
@ -563,6 +588,7 @@ class DataCache(val p : DataCacheConfig, mmuParameter : MemoryTranslatorBusParam
val memWordRange = log2Up(bytePerLine)-1 downto log2Up(bytePerMemWord)
val hitRange = tagRange.high downto lineRange.low
val memWordToCpuWordRange = log2Up(bytePerMemWord)-1 downto log2Up(bytePerWord)
val cpuWordToRfWordRange = log2Up(bytePerWord)-1 downto log2Up(p.rfDataBytes)
class LineInfo() extends Bundle{
@ -721,11 +747,15 @@ class DataCache(val p : DataCacheConfig, mmuParameter : MemoryTranslatorBusParam
val stage0 = new Area{
val mask = io.cpu.execute.size.mux (
U(0) -> B"0001",
U(1) -> B"0011",
default -> B"1111"
) |<< io.cpu.execute.address(1 downto 0)
// val mask = io.cpu.execute.size.mux (
// U(0) -> B"0001",
// U(1) -> B"0011",
// default -> B"1111"
// ) |<< io.cpu.execute.address(1 downto 0)
val mask = io.cpu.execute.size.muxListDc((0 to log2Up(p.cpuDataBytes)).map(i => U(i) -> B((1 << (1 << i)) -1, p.cpuDataBytes bits))) |<< io.cpu.execute.address(log2Up(p.cpuDataBytes)-1 downto 0)
val dataColisions = collisionProcess(io.cpu.execute.address(lineRange.high downto cpuWordRange.low), mask)
val wayInvalidate = B(0, wayCount bits) //Used if invalidate enabled
@ -792,7 +822,8 @@ class DataCache(val p : DataCacheConfig, mmuParameter : MemoryTranslatorBusParam
val wayInvalidate = stagePipe(stageA. wayInvalidate)
val consistancyHazard = if(stageA.consistancyCheck != null) stagePipe(stageA.consistancyCheck.hazard) else False
val dataColisions = stagePipe(stageA.dataColisions)
val unaligned = if(!catchUnaligned) False else stagePipe((stageA.request.size === 2 && io.cpu.memory.address(1 downto 0) =/= 0) || (stageA.request.size === 1 && io.cpu.memory.address(0 downto 0) =/= 0))
// val unaligned = if(!catchUnaligned) False else stagePipe((stageA.request.size === 2 && io.cpu.memory.address(1 downto 0) =/= 0) || (stageA.request.size === 1 && io.cpu.memory.address(0 downto 0) =/= 0))
val unaligned = if(!catchUnaligned) False else stagePipe((1 to log2Up(p.cpuDataBytes)).map(i => stageA.request.size === i && io.cpu.memory.address(i-1 downto 0) =/= 0).orR)
val waysHitsBeforeInvalidate = if(earlyWaysHits) stagePipe(B(stageA.wayHits)) else B(tagsReadRsp.map(tag => mmuRsp.physicalAddress(tagRange) === tag.address && tag.valid).asBits())
val waysHits = waysHitsBeforeInvalidate & ~wayInvalidate
val waysHit = waysHits.orR
@ -848,8 +879,9 @@ class DataCache(val p : DataCacheConfig, mmuParameter : MemoryTranslatorBusParam
val requestDataBypass = CombInit(io.cpu.writeBack.storeData)
import DataCacheExternalAmoStates._
val amo = withAmo generate new Area{
def rf = io.cpu.writeBack.storeData
def mem = if(withInternalAmo) dataMux else ioMemRspMuxed
def rf = io.cpu.writeBack.storeData(p.rfDataWidth-1 downto 0)
def memLarger = if(withInternalAmo) dataMux else ioMemRspMuxed
def mem = memLarger.subdivideIn(rfDataWidth bits).read(io.cpu.writeBack.address(cpuWordToRfWordRange))
val compare = request.amoCtrl.alu.msb
val unsigned = request.amoCtrl.alu(2 downto 1) === B"11"
val addSub = (rf.asSInt + Mux(compare, ~mem, mem).asSInt + Mux(compare, S(1), S(0))).asBits
@ -898,13 +930,13 @@ class DataCache(val p : DataCacheConfig, mmuParameter : MemoryTranslatorBusParam
io.mem.cmd.valid := False
io.mem.cmd.address := mmuRsp.physicalAddress(tagRange.high downto cpuWordRange.low) @@ U(0, cpuWordRange.low bits)
io.mem.cmd.length := 0
io.mem.cmd.address := mmuRsp.physicalAddress
io.mem.cmd.last := True
io.mem.cmd.wr := request.wr
io.mem.cmd.mask := mask
io.mem.cmd.data := requestDataBypass
io.mem.cmd.uncached := mmuRsp.isIoAccess
io.mem.cmd.size := request.size.resized
if(withExternalLrSc) io.mem.cmd.exclusive := request.isLrsc || isAmo
@ -962,8 +994,6 @@ class DataCache(val p : DataCacheConfig, mmuParameter : MemoryTranslatorBusParam
//Write through
io.mem.cmd.valid setWhen(request.wr)
io.mem.cmd.address := mmuRsp.physicalAddress(tagRange.high downto cpuWordRange.low) @@ U(0, cpuWordRange.low bits)
io.mem.cmd.length := 0
io.cpu.writeBack.haltIt clearWhen(!request.wr || io.mem.cmd.ready)
if(withInternalAmo) when(isAmo){
@ -989,8 +1019,8 @@ class DataCache(val p : DataCacheConfig, mmuParameter : MemoryTranslatorBusParam
//Emit cmd
io.mem.cmd.valid setWhen(!memCmdSent)
io.mem.cmd.wr := False
io.mem.cmd.address := mmuRsp.physicalAddress(tagRange.high downto lineRange.low) @@ U(0,lineRange.low bit)
io.mem.cmd.length := p.burstLength-1
io.mem.cmd.address(0, lineRange.low bits) := 0
io.mem.cmd.size := log2Up(p.bytePerLine)
loaderValid setWhen(io.mem.cmd.ready)
}
@ -1006,15 +1036,18 @@ class DataCache(val p : DataCacheConfig, mmuParameter : MemoryTranslatorBusParam
if(catchAccessError) io.cpu.writeBack.accessError := (waysHits & B(tagsReadRsp.map(_.error))) =/= 0 || (loadStoreFault && !mmuRsp.isPaging)
}
if(withLrSc) when(request.isLrsc && request.wr){
if(withLrSc) {
val success = if(withInternalLrSc)lrSc.reserved else io.mem.rsp.exclusive
io.cpu.writeBack.data := B(!success).resized
io.cpu.writeBack.exclusiveOk := success
when(request.isLrsc && request.wr){
// io.cpu.writeBack.data := B(!success).resized
if(withExternalLrSc) when(io.cpu.writeBack.isValid && io.mem.rsp.valid && rspSync && success && waysHit){
cpuWriteToCache := True
}
}
}
if(withAmo) when(request.isAmo){
requestDataBypass := amo.resultReg
requestDataBypass.subdivideIn(p.rfDataWidth bits).foreach(_ := amo.resultReg)
}
//remove side effects on exceptions

View File

@ -275,6 +275,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
output.rs1.setNanQuiet
output.rs1.sign := False
}
}
when(s1.format === FpuFormat.FLOAT =/= rs2Entry.boxed) {
output.rs2.setNanQuiet
output.rs2.sign := False
@ -284,7 +285,6 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
}
}
}
}
val decode = new Area{
val input = read.output.combStage()
@ -733,7 +733,6 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
val sgnjRs1Sign = CombInit(input.rs1.sign)
val sgnjRs2Sign = CombInit(input.rs2.sign)
if(p.withDouble){
sgnjRs1Sign setWhen(input.rs1Boxed && input.format === FpuFormat.DOUBLE)
sgnjRs2Sign setWhen(input.rs2Boxed && input.format === FpuFormat.DOUBLE)
}
val sgnjResult = (sgnjRs1Sign && input.arg(1)) ^ sgnjRs2Sign ^ input.arg(0)
@ -786,23 +785,13 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
}
}
is(FpuOpcode.SGNJ){
when(!input.rs1.isNan) {
rfOutput.value.sign := sgnjResult
if(p.withDouble) when(input.format === FpuFormat.DOUBLE){
when(input.rs1Boxed){
}
if(p.withDouble) when(input.rs1Boxed && input.format === FpuFormat.DOUBLE){
rfOutput.value.sign := input.rs1.sign
rfOutput.format := FpuFormat.FLOAT
}
// //kill boxing => F32 -> F64 NAN
// when(input.rs1Boxed && !sgnjResult){
// rfOutput.value.setNan
// rfOutput.value.mantissa.setAll()
// rfOutput.value.mantissa(31 downto 0) := input.rs1.sign ## input.rs1.exponent
// }
// //Spawn boxing => F64 NAN -> F32
// when(!input.rs1Boxed && input.rs1.exponent === exponentOne + 1024 && input.rs1.mantissa(32, 52-32 bits).andR && sgnjResult){
//
// }
}
}
if(p.withDouble) is(FpuOpcode.FCVT_X_X){
rfOutput.format := ((input.format === FpuFormat.FLOAT) ? FpuFormat.DOUBLE | FpuFormat.FLOAT)

View File

@ -6,6 +6,8 @@ import spinal.core._
import spinal.lib._
import spinal.lib.bus.amba4.axi.Axi4
import scala.collection.mutable.ArrayBuffer
class DAxiCachedPlugin(config : DataCacheConfig, memoryTranslatorPortConfig : Any = null) extends DBusCachedPlugin(config, memoryTranslatorPortConfig) {
var dAxi : Axi4 = null
@ -22,6 +24,7 @@ trait DBusEncodingService {
def addLoadWordEncoding(key: MaskedLiteral): Unit
def addStoreWordEncoding(key: MaskedLiteral): Unit
def bypassStore(data : Bits) : Unit
def loadData() : Bits
}
class DBusCachedPlugin(val config : DataCacheConfig,
@ -90,10 +93,15 @@ class DBusCachedPlugin(val config : DataCacheConfig,
)
}
val bypassStoreList = ArrayBuffer[(Bool, Bits)]()
override def bypassStore(data: Bits): Unit = {
pipeline.stages.last.input(MEMORY_STORE_DATA) := data
bypassStoreList += ConditionalContext.isTrue() -> data
}
override def loadData(): Bits = pipeline.stages.last.output(MEMORY_LOAD_DATA)
object MEMORY_ENABLE extends Stageable(Bool)
object MEMORY_MANAGMENT extends Stageable(Bool)
object MEMORY_WR extends Stageable(Bool)
@ -104,7 +112,9 @@ class DBusCachedPlugin(val config : DataCacheConfig,
object MEMORY_FORCE_CONSTISTENCY extends Stageable(Bool)
object IS_DBUS_SHARING extends Stageable(Bool())
object MEMORY_VIRTUAL_ADDRESS extends Stageable(UInt(32 bits))
object MEMORY_STORE_DATA extends Stageable(Bits(32 bits))
object MEMORY_STORE_DATA_RF extends Stageable(Bits(config.rfDataWidth bits))
// object MEMORY_STORE_DATA_CPU extends Stageable(Bits(config.cpuDataWidth bits))
object MEMORY_LOAD_DATA extends Stageable(Bits(config.cpuDataWidth bits))
override def setup(pipeline: VexRiscv): Unit = {
import Riscv._
@ -292,12 +302,12 @@ class DBusCachedPlugin(val config : DataCacheConfig,
cache.io.cpu.execute.isValid := arbitration.isValid && input(MEMORY_ENABLE)
cache.io.cpu.execute.address := input(SRC_ADD).asUInt
cache.io.cpu.execute.args.wr := input(MEMORY_WR)
insert(MEMORY_STORE_DATA) := size.mux(
insert(MEMORY_STORE_DATA_RF) := size.mux(
U(0) -> input(RS2)( 7 downto 0) ## input(RS2)( 7 downto 0) ## input(RS2)(7 downto 0) ## input(RS2)(7 downto 0),
U(1) -> input(RS2)(15 downto 0) ## input(RS2)(15 downto 0),
default -> input(RS2)(31 downto 0)
)
cache.io.cpu.execute.args.size := size
cache.io.cpu.execute.args.size := size.resized
if(twoStageMmu) {
mmuBus.cmd(0).isValid := cache.io.cpu.execute.isValid
@ -358,13 +368,16 @@ class DBusCachedPlugin(val config : DataCacheConfig,
}
val managementStage = stages.last
managementStage plug new Area{
val mgs = managementStage plug new Area{
import managementStage._
cache.io.cpu.writeBack.isValid := arbitration.isValid && input(MEMORY_ENABLE)
cache.io.cpu.writeBack.isStuck := arbitration.isStuck
cache.io.cpu.writeBack.isUser := (if(privilegeService != null) privilegeService.isUser() else False)
cache.io.cpu.writeBack.address := U(input(REGFILE_WRITE_DATA))
cache.io.cpu.writeBack.storeData := input(MEMORY_STORE_DATA)
cache.io.cpu.writeBack.storeData.subdivideIn(32 bits).foreach(_ := input(MEMORY_STORE_DATA_RF))
for((cond, value) <- bypassStoreList) when(cond){
cache.io.cpu.writeBack.storeData := value
}
val fence = if(withInvalidate) new Area {
cache.io.cpu.writeBack.fence := input(INSTRUCTION)(31 downto 20).as(FenceFlags())
@ -425,12 +438,15 @@ class DBusCachedPlugin(val config : DataCacheConfig,
arbitration.haltItself.setWhen(cache.io.cpu.writeBack.isValid && cache.io.cpu.writeBack.haltIt)
val rspShifted = Bits(32 bits)
rspShifted := cache.io.cpu.writeBack.data
val rspRf = cache.io.cpu.writeBack.data.subdivideIn(32 bits).read(cache.io.cpu.writeBack.address(cache.cpuWordToRfWordRange))
val rspShifted = CombInit(rspRf)
switch(input(MEMORY_ADDRESS_LOW)){
is(1){rspShifted(7 downto 0) := cache.io.cpu.writeBack.data(15 downto 8)}
is(2){rspShifted(15 downto 0) := cache.io.cpu.writeBack.data(31 downto 16)}
is(3){rspShifted(7 downto 0) := cache.io.cpu.writeBack.data(31 downto 24)}
is(1){rspShifted(7 downto 0) := rspRf(15 downto 8)}
is(2){rspShifted(15 downto 0) := rspRf(31 downto 16)}
is(3){rspShifted(7 downto 0) := rspRf(31 downto 24)}
}
if(withLrSc) when(input(MEMORY_LRSC) && input(MEMORY_WR)){
rspShifted := B(!cache.io.cpu.writeBack.exclusiveOk).resized
}
val rspFormated = input(INSTRUCTION)(13 downto 12).mux(
@ -443,7 +459,7 @@ class DBusCachedPlugin(val config : DataCacheConfig,
output(REGFILE_WRITE_DATA) := rspFormated
}
insert(DBUS_DATA) := cache.io.cpu.writeBack.data
insert(MEMORY_LOAD_DATA) := cache.io.cpu.writeBack.data
}
//Share access to the dBus (used by self refilled MMU)
@ -458,11 +474,11 @@ class DBusCachedPlugin(val config : DataCacheConfig,
cache.io.cpu.execute.isValid := True
dBusAccess.cmd.ready := !execute.arbitration.isStuck
}
cache.io.cpu.execute.args.wr := dBusAccess.cmd.write
execute.insert(MEMORY_STORE_DATA) := dBusAccess.cmd.data
cache.io.cpu.execute.args.size := dBusAccess.cmd.size
if(withLrSc) cache.io.cpu.execute.args.isLrsc := False
if(withAmo) cache.io.cpu.execute.args.isAmo := False
cache.io.cpu.execute.args.wr := False //dBusAccess.cmd.write
// execute.insert(MEMORY_STORE_DATA_RF) := dBusAccess.cmd.data //Not implemented
cache.io.cpu.execute.args.size := dBusAccess.cmd.size.resized
if(withLrSc) execute.input(MEMORY_LRSC) := False
if(withAmo) execute.input(MEMORY_AMO) := False
cache.io.cpu.execute.address := dBusAccess.cmd.address //Will only be 12 muxes
forceDatapath := True
}
@ -474,7 +490,7 @@ class DBusCachedPlugin(val config : DataCacheConfig,
if(mmuAndBufferStage != execute) (cache.io.cpu.memory.isValid setWhen(mmuAndBufferStage.input(IS_DBUS_SHARING)))
cache.io.cpu.writeBack.isValid setWhen(managementStage.input(IS_DBUS_SHARING))
dBusAccess.rsp.valid := managementStage.input(IS_DBUS_SHARING) && !cache.io.cpu.writeBack.isWrite && (cache.io.cpu.redo || !cache.io.cpu.writeBack.haltIt)
dBusAccess.rsp.data := cache.io.cpu.writeBack.data
dBusAccess.rsp.data := mgs.rspRf
dBusAccess.rsp.error := cache.io.cpu.writeBack.unalignedAccess || cache.io.cpu.writeBack.accessError
dBusAccess.rsp.redo := cache.io.cpu.redo
component.addPrePopTask{() =>

View File

@ -146,6 +146,10 @@ class FpuPlugin(externalFpu : Boolean = false,
val dBusEncoding = pipeline.service(classOf[DBusEncodingService])
dBusEncoding.addLoadWordEncoding(FLW)
dBusEncoding.addStoreWordEncoding(FSW)
if(p.withDouble) {
dBusEncoding.addLoadWordEncoding(FLD)
dBusEncoding.addStoreWordEncoding(FSD)
}
}
override def build(pipeline: VexRiscv): Unit = {
@ -235,7 +239,7 @@ class FpuPlugin(externalFpu : Boolean = false,
when(isRsp){
when(arbitration.isValid) {
dBusEncoding.bypassStore(port.rsp.value)
output(REGFILE_WRITE_DATA) := port.rsp.value
output(REGFILE_WRITE_DATA) := port.rsp.value(31 downto 0)
}
when(!port.rsp.valid){
arbitration.haltByOther := True
@ -247,7 +251,8 @@ class FpuPlugin(externalFpu : Boolean = false,
// Manage $load
val commit = Stream(FpuCommit(p))
commit.valid := isCommit && !arbitration.isStuck
commit.value := (input(FPU_COMMIT_LOAD) ? output(DBUS_DATA) | input(RS1))
commit.value(31 downto 0) := (input(FPU_COMMIT_LOAD) ? dBusEncoding.loadData()(31 downto 0) | input(RS1))
if(p.withDouble) commit.value(63 downto 32) := dBusEncoding.loadData()(63 downto 32)
commit.write := arbitration.isValid && !arbitration.removeIt
commit.sync := input(FPU_COMMIT_SYNC)

View File

@ -4,7 +4,7 @@
#include "VVexRiscv_RiscvCore.h"
#endif
#include "verilated.h"
#include "verilated_vcd_c.h"
#include "verilated_fst_c.h"
#include <stdio.h>
#include <iostream>
#include <stdlib.h>
@ -175,8 +175,12 @@ void loadBinImpl(string path,Memory* mem, uint32_t offset) {
#define TEXTIFY(A) #A
void breakMe(){
int a = 0;
}
#define assertEq(x,ref) if(x != ref) {\
printf("\n*** %s is %d but should be %d ***\n\n",TEXTIFY(x),x,ref);\
breakMe();\
throw std::exception();\
}
@ -1106,7 +1110,7 @@ public:
uint32_t bootPc = -1;
uint32_t iStall = STALL,dStall = STALL;
#ifdef TRACE
VerilatedVcdC* tfp;
VerilatedFstC* tfp;
#endif
bool allowInvalidate = true;
@ -1129,13 +1133,13 @@ public:
class MemWrite {
public:
int32_t address, size;
uint32_t data;
uint8_t data42[64];
};
class MemRead {
public:
int32_t address, size;
uint32_t data;
uint8_t data42[64];
bool error;
};
@ -1186,7 +1190,10 @@ public:
cout << " DUT : address=" << t.address << " size=" << t.size << endl;
fail();
}
*data = t.data;
for(int i = 0; i < size; i++){
((uint8_t*)data)[i] = t.data42[i];
}
periphRead.pop();
return t.error;
}else {
@ -1205,10 +1212,8 @@ public:
MemWrite w;
w.address = address;
w.size = size;
switch(size){
case 1: w.data = data & 0xFF; break;
case 2: w.data = data & 0xFFFF; break;
case 4: w.data = data; break;
for(int i = 0; i < size; i++){
w.data42[i] = ((uint8_t*)&data)[i];
}
periphWritesGolden.push(w);
if(periphWritesGolden.size() > 10){
@ -1231,10 +1236,12 @@ public:
case 0:
MemWrite t = periphWrites.front();
MemWrite t2 = periphWritesGolden.front();
if(t.address != t2.address || t.size != t2.size || t.data != t2.data){
bool dataMatch = true;
for(int i = 0;i < min(t.size, t2.size);i++) dataMatch &= t.data42[i] == t2.data42[i];
if(t.address != t2.address || t.size != t2.size || !dataMatch){
cout << hex << "periphWrite missmatch" << endl;
cout << " DUT address=" << t.address << " size=" << t.size << " data=" << t.data << endl;
cout << " REF address=" << t2.address << " size=" << t2.size << " data=" << t2.data << endl;
cout << " DUT address=" << t.address << " size=" << t.size << " data=" << *((uint32_t*)t.data42) << endl;
cout << " REF address=" << t2.address << " size=" << t2.size << " data=" << *((uint32_t*)t2.data42) << endl;
fail();
}
periphWrites.pop();
@ -1345,43 +1352,19 @@ public:
virtual bool isDBusCheckedRegion(uint32_t address){ return isPerifRegion(address);}
virtual void dBusAccess(uint32_t addr,bool wr, uint32_t size,uint32_t mask, uint32_t *data, bool *error) {
assertEq(addr % (1 << size), 0);
virtual void dBusAccess(uint32_t addr,bool wr, uint32_t size, uint8_t *data, bool *error) {
assertEq(addr % size, 0);
if(!isPerifRegion(addr)) {
if(wr){
memTraces <<
#ifdef TRACE_WITH_TIME
(currentTime
#ifdef REF
-2
#endif
) <<
#endif
" : WRITE mem" << hex << (1 << size) << "[" << addr << "] = " << *data << dec << endl;
for(uint32_t b = 0;b < (1 << size);b++){
uint32_t offset = (addr+b)&0x3;
if((mask >> offset) & 1 == 1)
*mem.get(addr + b) = *data >> (offset*8);
for(uint32_t b = 0;b < size;b++){
*mem.get(addr + b) = ((uint8_t*)data)[b];
}
}else{
*data = VL_RANDOM_I(32);
for(uint32_t b = 0;b < (1 << size);b++){
uint32_t offset = (addr+b)&0x3;
*data &= ~(0xFF << (offset*8));
*data |= mem[addr + b] << (offset*8);
uint32_t innerOffset = addr & (DBUS_LOAD_DATA_WIDTH/8-1);
for(uint32_t b = 0;b < size;b++){
((uint8_t*)data)[b] = mem[addr + b];
}
/*
memTraces <<
#ifdef TRACE_WITH_TIME
(currentTime
#ifdef REF
-2
#endif
) <<
#endif
" : READ mem" << (1 << size) << "[" << addr << "] = " << *data << endl;*/
}
}
@ -1390,21 +1373,9 @@ public:
if(isDBusCheckedRegion(addr)){
CpuRef::MemWrite w;
w.address = addr;
while((mask & 1) == 0){
mask >>= 1;
w.address++;
w.data >>= 8;
}
switch(mask){
case 1: size = 0; break;
case 3: size = min(1u, size); break;
case 15: size = min(2u, size); break;
}
w.size = 1 << size;
switch(size){
case 0: w.data = *data & 0xFF; break;
case 1: w.data = *data & 0xFFFF; break;
case 2: w.data = *data ; break;
w.size = size;
for(uint32_t b = 0;b < size;b++){
w.data42[b] = data[b];
}
riscvRef.periphWrites.push(w);
}
@ -1412,8 +1383,10 @@ public:
if(isPerifRegion(addr)){
CpuRef::MemRead r;
r.address = addr;
r.size = 1 << size;
r.data = *data;
r.size = size;
for(uint32_t b = 0;b < size;b++){
r.data42[b] = data[b];
}
r.error = *error;
riscvRef.periphRead.push(r);
}
@ -1461,9 +1434,9 @@ public:
// init trace dump
#ifdef TRACE
Verilated::traceEverOn(true);
tfp = new VerilatedVcdC;
tfp = new VerilatedFstC;
top->trace(tfp, 99);
tfp->open((vcdName + ".vcd").c_str());
tfp->open((vcdName + ".fst").c_str());
#endif
// Reset
@ -1725,7 +1698,8 @@ public:
virtual void dutPutChar(char c){}
virtual void dBusAccess(uint32_t addr,bool wr, uint32_t size,uint32_t mask, uint32_t *data, bool *error) {
virtual void dBusAccess(uint32_t addr,bool wr, uint32_t size, uint8_t *dataBytes, bool *error) {
uint32_t *data = ((uint32_t*)dataBytes);
if(wr){
switch(addr){
case 0xF0010000u: {
@ -1788,19 +1762,10 @@ public:
case 0xF00FFF4Cu: *data = mTimeCmp >> 32; break;
case 0xF0010004u: *data = ~0; break;
}
memTraces <<
#ifdef TRACE_WITH_TIME
(currentTime
#ifdef REF
-2
#endif
) <<
#endif
" : READ mem" << (1 << size) << "[" << addr << "] = " << *data << endl;
}
*error = addr == 0xF00FFF60u;
Workspace::dBusAccess(addr,wr,size,mask,data,error);
Workspace::dBusAccess(addr,wr,size,dataBytes,error);
}
@ -2195,7 +2160,7 @@ public:
if (top->dBus_cmd_valid && top->dBus_cmd_ready) {
pending = true;
data_next = top->dBus_cmd_payload_data;
ws->dBusAccess(top->dBus_cmd_payload_address,top->dBus_cmd_payload_wr,top->dBus_cmd_payload_size,0xF,&data_next,&error_next);
ws->dBusAccess(top->dBus_cmd_payload_address,top->dBus_cmd_payload_wr,1 << top->dBus_cmd_payload_size,((uint8_t*)&data_next) + (top->dBus_cmd_payload_address & 3),&error_next);
}
}
@ -2370,7 +2335,7 @@ public:
#include <queue>
struct DBusCachedTask{
char data[DBUS_DATA_WIDTH/8];
char data[DBUS_LOAD_DATA_WIDTH/8];
bool error;
bool last;
bool exclusive;
@ -2407,12 +2372,14 @@ public:
virtual void preCycle(){
if (top->dBus_cmd_valid && top->dBus_cmd_ready) {
if(top->dBus_cmd_payload_wr){
int size = 1 << top->dBus_cmd_payload_size;
#ifdef DBUS_INVALIDATE
pendingSync += 1;
#endif
#ifndef DBUS_EXCLUSIVE
bool error;
ws->dBusAccess(top->dBus_cmd_payload_address,1,2,top->dBus_cmd_payload_mask,&top->dBus_cmd_payload_data,&error);
int shift = top->dBus_cmd_payload_address & (DBUS_STORE_DATA_WIDTH/8-1);
ws->dBusAccess(top->dBus_cmd_payload_address,1,size,((uint8_t*)&top->dBus_cmd_payload_data) + shift,&error);
#else
bool cancel = false, error = false;
if(top->dBus_cmd_payload_exclusive){
@ -2424,31 +2391,28 @@ public:
if(!cancel) {
for(int idx = 0;idx < 1;idx++){
bool localError = false;
ws->dBusAccess(top->dBus_cmd_payload_address+idx*4,1,2,top->dBus_cmd_payload_mask >> idx*4,((uint32_t*)&top->dBus_cmd_payload_data)+idx, &localError);
int shift = top->dBus_cmd_payload_address & (DBUS_STORE_DATA_WIDTH/8-1);
ws->dBusAccess(top->dBus_cmd_payload_address,1,size,((uint8_t*)&top->dBus_cmd_payload_data) + shift,&localError);
error |= localError;
//printf("%d ", (int)localError);
}
}
// printf("%x %d\n", top->dBus_cmd_payload_address, (int)error);
rsp.last = true;
rsp.error = error;
rsps.push(rsp);
#endif
} else {
bool error = false;
uint32_t beatCount = top->dBus_cmd_payload_length*32/DBUS_DATA_WIDTH;
uint32_t beatCount = (((1 << top->dBus_cmd_payload_size)*8+DBUS_LOAD_DATA_WIDTH-1) / DBUS_LOAD_DATA_WIDTH)-1;
uint32_t startAt = top->dBus_cmd_payload_address;
uint32_t endAt = top->dBus_cmd_payload_address + (1 << top->dBus_cmd_payload_size);
uint32_t address = top->dBus_cmd_payload_address & ~(DBUS_LOAD_DATA_WIDTH/8-1);
uint8_t buffer[64];
ws->dBusAccess(top->dBus_cmd_payload_address,0,1 << top->dBus_cmd_payload_size,buffer, &error);
for(int beat = 0;beat <= beatCount;beat++){
if(top->dBus_cmd_payload_length == 0){
uint32_t sel = (top->dBus_cmd_payload_address >> 2) & (DBUS_DATA_WIDTH/32-1);
ws->dBusAccess(top->dBus_cmd_payload_address,0,2,0,((uint32_t*)rsp.data) + sel,&error);
} else {
for(int idx = 0;idx < DBUS_DATA_WIDTH/32;idx++){
bool localError = false;
ws->dBusAccess(top->dBus_cmd_payload_address + beat * DBUS_DATA_WIDTH/8 + idx*4,0,2,0,((uint32_t*)rsp.data)+idx, &localError);
error |= localError;
}
for(int i = 0;i < DBUS_LOAD_DATA_WIDTH/8;i++){
rsp.data[i] = (address >= startAt && address < endAt) ? buffer[address-top->dBus_cmd_payload_address] : VL_RANDOM_I(8);
address += 1;
}
rsp.last = beat == beatCount;
#ifdef DBUS_EXCLUSIVE
@ -2485,7 +2449,7 @@ public:
rsps.pop();
top->dBus_rsp_valid = 1;
top->dBus_rsp_payload_error = rsp.error;
for(int idx = 0;idx < DBUS_DATA_WIDTH/32;idx++){
for(int idx = 0;idx < DBUS_LOAD_DATA_WIDTH/32;idx++){
((uint32_t*)&top->dBus_rsp_payload_data)[idx] = ((uint32_t*)rsp.data)[idx];
}
top->dBus_rsp_payload_last = rsp.last;
@ -2494,7 +2458,7 @@ public:
#endif
} else{
top->dBus_rsp_valid = 0;
for(int idx = 0;idx < DBUS_DATA_WIDTH/32;idx++){
for(int idx = 0;idx < DBUS_LOAD_DATA_WIDTH/32;idx++){
((uint32_t*)&top->dBus_rsp_payload_data)[idx] = VL_RANDOM_I(32);
}
top->dBus_rsp_payload_error = VL_RANDOM_I(1);
@ -3092,12 +3056,13 @@ public:
}
virtual void dBusAccess(uint32_t addr,bool wr, uint32_t size,uint32_t mask, uint32_t *data, bool *error) {
virtual void dBusAccess(uint32_t addr,bool wr, uint32_t size, uint8_t *dataBytes, bool *error) {
if(wr && addr == 0xF00FFF2C){
uint32_t *data = (uint32_t*)dataBytes;
out32 << hex << setw(8) << std::setfill('0') << *data << dec;
if(++out32Counter % 4 == 0) out32 << "\n";
}
WorkspaceRegression::dBusAccess(addr,wr,size,mask,data,error);
WorkspaceRegression::dBusAccess(addr,wr,size,dataBytes,error);
}
virtual void checks(){
@ -3437,9 +3402,11 @@ public:
virtual void dBusAccess(uint32_t addr,bool wr, uint32_t size,uint32_t mask, uint32_t *data, bool *error) {
if(isPerifRegion(addr)) switch(addr){
//TODO Emulate peripherals here
virtual void dBusAccess(uint32_t addr,bool wr, uint32_t size,uint8_t *dataBytes, bool *error) {
uint32_t *data = (uint32_t*)dataBytes;
if(isPerifRegion(addr)) {
switch(addr){
case 0xFFFFFFE0: if(wr) fail(); else *data = mTime; break;
case 0xFFFFFFE4: if(wr) fail(); else *data = mTime >> 32; break;
case 0xFFFFFFE8: if(wr) mTimeCmp = (mTimeCmp & 0xFFFFFFFF00000000) | *data; else *data = mTimeCmp; break;
@ -3468,10 +3435,10 @@ public:
}
break;
case 0xFFFFFFFC: fail(); break; //Simulation end
default: cout << "Unmapped peripheral access : addr=0x" << hex << addr << " wr=" << wr << " mask=0x" << mask << " data=0x" << data << dec << endl; fail(); break;
default: cout << "Unmapped peripheral access : addr=0x" << hex << addr << " wr=" << wr << " mask=0x" << " data=0x" << data << dec << endl; fail(); break;
}
Workspace::dBusAccess(addr,wr,size,mask,data,error);
}
Workspace::dBusAccess(addr,wr,size,dataBytes,error);
}
virtual void onStdout(char c){
@ -3541,9 +3508,9 @@ public:
virtual void dBusAccess(uint32_t addr,bool wr, uint32_t size,uint32_t mask, uint32_t *data, bool *error) {
virtual void dBusAccess(uint32_t addr,bool wr, uint32_t size,uint64_t mask, uint8_t *dataBytes, bool *error) {
uint32_t *data = (uint32_t*)dataBytes;
if(isPerifRegion(addr)) switch(addr){
//TODO Emulate peripherals here
case 0xF0010000: if(wr && *data != 0) fail(); else *data = 0; break;
case 0xF001BFF8: if(wr) fail(); else *data = mTime; break;
case 0xF001BFFC: if(wr) fail(); else *data = mTime >> 32; break;
@ -3576,7 +3543,6 @@ public:
break;
default: cout << "Unmapped peripheral access : addr=0x" << hex << addr << " wr=" << wr << " mask=0x" << mask << " data=0x" << data << dec << endl; fail(); break;
}
Workspace::dBusAccess(addr,wr,size,mask,data,error);
}
@ -3891,7 +3857,7 @@ int main(int argc, char **argv, char **env) {
redo(REDO,RiscvTest(name).bootAt(0x80000188u)->writeWord(0x80000184u, 0x00305073)->run();)
}
#endif
return 0;
//return 0;
//#ifdef LITEX
// LitexSoC("linux")
@ -4064,11 +4030,6 @@ int main(int argc, char **argv, char **env) {
redo(REDO,RiscvTest(name).run();)
}
#ifdef RVF
for(const string &name : riscvTestFloat){
redo(REDO,RiscvTest(name).run();)
}
#endif
#ifdef MUL
for(const string &name : riscvTestMul){

View File

@ -5,7 +5,8 @@ IBUS?=CACHED
IBUS_TC?=no
IBUS_DATA_WIDTH?=32
DBUS?=CACHED
DBUS_DATA_WIDTH?=32
DBUS_LOAD_DATA_WIDTH?=32
DBUS_STORE_DATA_WIDTH?=32
TRACE?=no
TRACE_ACCESS?=no
TRACE_START=0
@ -50,7 +51,8 @@ WITH_USER_IO?=no
ADDCFLAGS += -CFLAGS -DREGRESSION_PATH='\"$(REGRESSION_PATH)/\"'
ADDCFLAGS += -CFLAGS -DIBUS_${IBUS}
ADDCFLAGS += -CFLAGS -DIBUS_DATA_WIDTH=${IBUS_DATA_WIDTH}
ADDCFLAGS += -CFLAGS -DDBUS_DATA_WIDTH=${DBUS_DATA_WIDTH}
ADDCFLAGS += -CFLAGS -DDBUS_LOAD_DATA_WIDTH=${DBUS_LOAD_DATA_WIDTH}
ADDCFLAGS += -CFLAGS -DDBUS_STORE_DATA_WIDTH=${DBUS_STORE_DATA_WIDTH}
ADDCFLAGS += -CFLAGS -DDBUS_${DBUS}
ADDCFLAGS += -CFLAGS -DREDO=${REDO}
@ -197,9 +199,8 @@ ifneq ($(SEED),no)
ADDCFLAGS += -CFLAGS -DSEED=${SEED}
endif
ifeq ($(TRACE),yes)
VERILATOR_ARGS += --trace
VERILATOR_ARGS += --trace-fst
ADDCFLAGS += -CFLAGS -DTRACE
endif

View File

@ -50,100 +50,100 @@ class DhrystoneBench extends FunSuite {
}
// for(withMemoryStage <- List(false, true)){
// val stages = if(withMemoryStage) "Three" else "Two"
// getDmips(
// name = s"Gen${stages}StageArty",
// gen = SpinalVerilog(GenTwoThreeStage.cpu(
// withMulDiv = false,
// bypass = false,
// barrielShifter = false,
// withMemoryStage = withMemoryStage
// )),
// testCmd = "make clean run REDO=10 IBUS=SIMPLE DBUS=SIMPLE CSR=no MMU=no DEBUG_PLUGIN=no MUL=no DIV=no COREMARK=yes"
// )
// getDmips(
// name = s"Gen${stages}StageBarrielArty",
// gen = SpinalVerilog(GenTwoThreeStage.cpu(
// withMulDiv = false,
// bypass = true,
// barrielShifter = true,
// withMemoryStage = withMemoryStage
// )),
// testCmd = "make clean run REDO=10 IBUS=SIMPLE DBUS=SIMPLE CSR=no MMU=no DEBUG_PLUGIN=no MUL=no DIV=no COREMARK=yes"
// )
// getDmips(
// name = s"Gen${stages}StageMDArty",
// gen = SpinalVerilog(GenTwoThreeStage.cpu(
// withMulDiv = true,
// bypass = false,
// barrielShifter = false,
// withMemoryStage = withMemoryStage
// )),
// testCmd = "make clean run REDO=10 IBUS=SIMPLE DBUS=SIMPLE CSR=no MMU=no DEBUG_PLUGIN=no MUL=yes DIV=yes COREMARK=yes"
// )
// getDmips(
// name = s"Gen${stages}StageMDBarrielArty",
// gen = SpinalVerilog(GenTwoThreeStage.cpu(
// withMulDiv = true,
// bypass = true,
// barrielShifter = true,
// withMemoryStage = withMemoryStage
// )),
// testCmd = "make clean run REDO=10 IBUS=SIMPLE DBUS=SIMPLE CSR=no MMU=no DEBUG_PLUGIN=no MUL=yes DIV=yes COREMARK=yes"
// )
// }
//
// getDmips(
// name = "GenSmallestNoCsr",
// gen = GenSmallestNoCsr.main(null),
// testCmd = "make clean run REDO=10 IBUS=SIMPLE DBUS=SIMPLE CSR=no MMU=no DEBUG_PLUGIN=no MUL=no DIV=no COREMARK=yes"
// )
//
//
// getDmips(
// name = "GenSmallest",
// gen = GenSmallest.main(null),
// testCmd = "make clean run REDO=10 IBUS=SIMPLE DBUS=SIMPLE CSR=no MMU=no DEBUG_PLUGIN=no MUL=no DIV=no COREMARK=yes"
// )
//
//
// getDmips(
// name = "GenSmallAndProductive",
// gen = GenSmallAndProductive.main(null),
// testCmd = "make clean run REDO=10 IBUS=SIMPLE DBUS=SIMPLE CSR=no MMU=no DEBUG_PLUGIN=no MUL=no DIV=no COREMARK=yes"
// )
//
// getDmips(
// name = "GenSmallAndProductiveWithICache",
// gen = GenSmallAndProductiveICache.main(null),
// testCmd = "make clean run REDO=10 IBUS=CACHED DBUS=SIMPLE CSR=no MMU=no DEBUG_PLUGIN=no MUL=no DIV=no COREMARK=yes"
// )
//
//
// getDmips(
// name = "GenFullNoMmuNoCache",
// gen = GenFullNoMmuNoCache.main(null),
// testCmd = "make clean run REDO=10 IBUS=SIMPLE DBUS=SIMPLE CSR=no MMU=no COREMARK=yes"
// )
//
// getDmips(
// name = "GenNoCacheNoMmuMaxPerf",
// gen = GenNoCacheNoMmuMaxPerf.main(null),
// testCmd = "make clean run REDO=10 MMU=no CSR=no DBUS=SIMPLE IBUS=SIMPLE COREMARK=yes"
// )
//
//
// getDmips(
// name = "GenFullNoMmuMaxPerf",
// gen = GenFullNoMmuMaxPerf.main(null),
// testCmd = "make clean run REDO=10 MMU=no CSR=no COREMARK=yes"
// )
// getDmips(
// name = "GenFullNoMmu",
// gen = GenFullNoMmu.main(null),
// testCmd = "make clean run REDO=10 MMU=no CSR=no COREMARK=yes"
// )
for(withMemoryStage <- List(false, true)){
val stages = if(withMemoryStage) "Three" else "Two"
getDmips(
name = s"Gen${stages}StageArty",
gen = SpinalVerilog(GenTwoThreeStage.cpu(
withMulDiv = false,
bypass = false,
barrielShifter = false,
withMemoryStage = withMemoryStage
)),
testCmd = "make clean run REDO=10 IBUS=SIMPLE DBUS=SIMPLE CSR=no MMU=no DEBUG_PLUGIN=no MUL=no DIV=no COREMARK=yes"
)
getDmips(
name = s"Gen${stages}StageBarrielArty",
gen = SpinalVerilog(GenTwoThreeStage.cpu(
withMulDiv = false,
bypass = true,
barrielShifter = true,
withMemoryStage = withMemoryStage
)),
testCmd = "make clean run REDO=10 IBUS=SIMPLE DBUS=SIMPLE CSR=no MMU=no DEBUG_PLUGIN=no MUL=no DIV=no COREMARK=yes"
)
getDmips(
name = s"Gen${stages}StageMDArty",
gen = SpinalVerilog(GenTwoThreeStage.cpu(
withMulDiv = true,
bypass = false,
barrielShifter = false,
withMemoryStage = withMemoryStage
)),
testCmd = "make clean run REDO=10 IBUS=SIMPLE DBUS=SIMPLE CSR=no MMU=no DEBUG_PLUGIN=no MUL=yes DIV=yes COREMARK=yes"
)
getDmips(
name = s"Gen${stages}StageMDBarrielArty",
gen = SpinalVerilog(GenTwoThreeStage.cpu(
withMulDiv = true,
bypass = true,
barrielShifter = true,
withMemoryStage = withMemoryStage
)),
testCmd = "make clean run REDO=10 IBUS=SIMPLE DBUS=SIMPLE CSR=no MMU=no DEBUG_PLUGIN=no MUL=yes DIV=yes COREMARK=yes"
)
}
getDmips(
name = "GenSmallestNoCsr",
gen = GenSmallestNoCsr.main(null),
testCmd = "make clean run REDO=10 IBUS=SIMPLE DBUS=SIMPLE CSR=no MMU=no DEBUG_PLUGIN=no MUL=no DIV=no COREMARK=yes"
)
getDmips(
name = "GenSmallest",
gen = GenSmallest.main(null),
testCmd = "make clean run REDO=10 IBUS=SIMPLE DBUS=SIMPLE CSR=no MMU=no DEBUG_PLUGIN=no MUL=no DIV=no COREMARK=yes"
)
getDmips(
name = "GenSmallAndProductive",
gen = GenSmallAndProductive.main(null),
testCmd = "make clean run REDO=10 IBUS=SIMPLE DBUS=SIMPLE CSR=no MMU=no DEBUG_PLUGIN=no MUL=no DIV=no COREMARK=yes"
)
getDmips(
name = "GenSmallAndProductiveWithICache",
gen = GenSmallAndProductiveICache.main(null),
testCmd = "make clean run REDO=10 IBUS=CACHED DBUS=SIMPLE CSR=no MMU=no DEBUG_PLUGIN=no MUL=no DIV=no COREMARK=yes"
)
getDmips(
name = "GenFullNoMmuNoCache",
gen = GenFullNoMmuNoCache.main(null),
testCmd = "make clean run REDO=10 IBUS=SIMPLE DBUS=SIMPLE CSR=no MMU=no COREMARK=yes"
)
getDmips(
name = "GenNoCacheNoMmuMaxPerf",
gen = GenNoCacheNoMmuMaxPerf.main(null),
testCmd = "make clean run REDO=10 MMU=no CSR=no DBUS=SIMPLE IBUS=SIMPLE COREMARK=yes"
)
getDmips(
name = "GenFullNoMmuMaxPerf",
gen = GenFullNoMmuMaxPerf.main(null),
testCmd = "make clean run REDO=10 MMU=no CSR=no COREMARK=yes"
)
getDmips(
name = "GenFullNoMmu",
gen = GenFullNoMmu.main(null),
testCmd = "make clean run REDO=10 MMU=no CSR=no COREMARK=yes"
)
getDmips(
name = "GenFull",
@ -151,11 +151,11 @@ class DhrystoneBench extends FunSuite {
testCmd = "make clean run REDO=10 CSR=no MMU=no COREMARK=yes"
)
// getDmips(
// name = "GenLinuxBalenced",
// gen = LinuxGen.main(Array.fill[String](0)("")),
// testCmd = "make clean run IBUS=CACHED DBUS=CACHED DEBUG_PLUGIN=STD DHRYSTONE=yes SUPERVISOR=yes MMU=no CSR=yes CSR_SKIP_TEST=yes COMPRESSED=no MUL=yes DIV=yes LRSC=yes AMO=yes REDO=10 TRACE=no COREMARK=yes LINUX_REGRESSION=no"
// )
getDmips(
name = "GenLinuxBalenced",
gen = LinuxGen.main(Array.fill[String](0)("")),
testCmd = "make clean run IBUS=CACHED DBUS=CACHED DEBUG_PLUGIN=STD DHRYSTONE=yes SUPERVISOR=yes MMU=no CSR=yes CSR_SKIP_TEST=yes COMPRESSED=no MUL=yes DIV=yes LRSC=yes AMO=yes REDO=10 TRACE=no COREMARK=yes LINUX_REGRESSION=no"
)
test("final_report") {

View File

@ -438,6 +438,8 @@ class DBusDimension extends VexRiscvDimension("DBus") {
val twoStageMmu = r.nextBoolean() && !noMemory && !noWriteBack
val mmuConfig = if(universes.contains(VexRiscvUniverse.MMU)) MmuPortConfig(portTlbSize = 4, latency = if(twoStageMmu) 1 else 0, earlyRequireMmuLockup = Random.nextBoolean() && twoStageMmu, earlyCacheHits = Random.nextBoolean() && twoStageMmu) else null
val memDataWidth = List(32,64,128)(r.nextInt(3))
val cpuDataWidthChoices = List(32,64,128).filter(_ <= memDataWidth)
val cpuDataWidth = cpuDataWidthChoices(r.nextInt(cpuDataWidthChoices.size))
val bytePerLine = Math.max(memDataWidth/8, List(8,16,32,64)(r.nextInt(4)))
var cacheSize = 0
var wayCount = 0
@ -455,8 +457,8 @@ class DBusDimension extends VexRiscvDimension("DBus") {
cacheSize = 512 << r.nextInt(5)
wayCount = 1 << r.nextInt(3)
}while(cacheSize/wayCount < 512 || (catchAll && cacheSize/wayCount > 4096))
new VexRiscvPosition(s"Cached${memDataWidth}d" + "S" + cacheSize + "W" + wayCount + "BPL" + bytePerLine + (if(dBusCmdMasterPipe) "Cmp " else "") + (if(dBusCmdSlavePipe) "Csp " else "") + (if(dBusRspSlavePipe) "Rsp " else "") + (if(relaxedMemoryTranslationRegister) "Rmtr " else "") + (if(earlyWaysHits) "Ewh " else "") + (if(withAmo) "Amo " else "") + (if(withSmp) "Smp " else "") + (if(directTlbHit) "Dtlb " else "") + (if(twoStageMmu) "Tsmmu " else "") + (if(asyncTagMemory) "Atm" else "")) {
override def testParam = s"DBUS=CACHED DBUS_DATA_WIDTH=$memDataWidth " + (if(withLrSc) "LRSC=yes " else "") + (if(withAmo) "AMO=yes " else "") + (if(withSmp) "DBUS_EXCLUSIVE=yes DBUS_INVALIDATE=yes " else "")
new VexRiscvPosition(s"Cached${memDataWidth}d${cpuDataWidth}c" + "S" + cacheSize + "W" + wayCount + "BPL" + bytePerLine + (if(dBusCmdMasterPipe) "Cmp " else "") + (if(dBusCmdSlavePipe) "Csp " else "") + (if(dBusRspSlavePipe) "Rsp " else "") + (if(relaxedMemoryTranslationRegister) "Rmtr " else "") + (if(earlyWaysHits) "Ewh " else "") + (if(withAmo) "Amo " else "") + (if(withSmp) "Smp " else "") + (if(directTlbHit) "Dtlb " else "") + (if(twoStageMmu) "Tsmmu " else "") + (if(asyncTagMemory) "Atm" else "")) {
override def testParam = s"DBUS=CACHED DBUS_LOAD_DATA_WIDTH=$memDataWidth DBUS_STORE_DATA_WIDTH=$cpuDataWidth " + (if(withLrSc) "LRSC=yes " else "") + (if(withAmo) "AMO=yes " else "") + (if(withSmp) "DBUS_EXCLUSIVE=yes DBUS_INVALIDATE=yes " else "")
override def applyOn(config: VexRiscvConfig): Unit = {
config.plugins += new DBusCachedPlugin(
@ -465,7 +467,8 @@ class DBusDimension extends VexRiscvDimension("DBus") {
bytePerLine = bytePerLine,
wayCount = wayCount,
addressWidth = 32,
cpuDataWidth = 32,
rfDataWidth = 32,
cpuDataWidth = cpuDataWidth, //Not tested
memDataWidth = memDataWidth,
catchAccessError = catchAll,
catchIllegal = catchAll,

View File

@ -19,7 +19,7 @@ import scala.collection.mutable.ArrayBuffer
import scala.sys.process.ProcessLogger
import scala.util.Random
//TODO Warning DataCache write aggregation will disable itself
class FpuTest extends FunSuite{
val b2f = lang.Float.intBitsToFloat(_)
@ -55,7 +55,7 @@ class FpuTest extends FunSuite{
}
def testP(p : FpuParameter){
val portCount = 4
val portCount = 1
val config = SimConfig
config.allOptimisation
@ -978,32 +978,35 @@ class FpuTest extends FunSuite{
def testSgnjRaw(a : Float, b : Float): Unit ={
val ref = b2f((f2b(a) & ~0x80000000) | f2b(b) & 0x80000000)
var ref = b2f((f2b(a) & ~0x80000000) | f2b(b) & 0x80000000)
if(a.isNaN) ref = a
testBinaryOp(sgnj,a,b,ref,0, null,"sgnj")
}
def testSgnjnRaw(a : Float, b : Float): Unit ={
val ref = b2f((f2b(a) & ~0x80000000) | ((f2b(b) & 0x80000000) ^ 0x80000000))
var ref = b2f((f2b(a) & ~0x80000000) | ((f2b(b) & 0x80000000) ^ 0x80000000))
if(a.isNaN) ref = a
testBinaryOp(sgnjn,a,b,ref,0, null,"sgnjn")
}
def testSgnjxRaw(a : Float, b : Float): Unit ={
val ref = b2f(f2b(a) ^ (f2b(b) & 0x80000000))
var ref = b2f(f2b(a) ^ (f2b(b) & 0x80000000))
if(a.isNaN) ref = a
testBinaryOp(sgnjx,a,b,ref,0, null,"sgnjx")
}
val f64SignMask = 1l << 63
def testSgnjF64Raw(a : Double, b : Double): Unit ={
var ref = b2d((d2b(a).toLong & ~f64SignMask) | d2b(b).toLong & f64SignMask)
if(d2b(a).toLong >> 32 == -1) ref = a
if(a.isNaN) ref = a
testBinaryOpF64(sgnj,a,b,ref,0, null,"sgnj")
}
def testSgnjnF64Raw(a : Double, b : Double): Unit ={
var ref = b2d((d2b(a).toLong & ~f64SignMask) | ((d2b(b).toLong & f64SignMask) ^ f64SignMask))
if(d2b(a).toLong >> 32 == -1) ref = a
if(a.isNaN) ref = a
testBinaryOpF64(sgnjn,a,b,ref,0, null,"sgnjn")
}
def testSgnjxF64Raw(a : Double, b : Double): Unit ={
var ref = b2d(d2b(a).toLong ^ (d2b(b).toLong & f64SignMask))
if(d2b(a).toLong >> 32 == -1) ref = a
if(a.isNaN) ref = a
testBinaryOpF64(sgnjx,a,b,ref,0, null,"sgnjx")
}
@ -1277,6 +1280,17 @@ class FpuTest extends FunSuite{
//TODO test boxing
//TODO double <-> simple convertions
if(p.withDouble) {
for(_ <- 0 until 10000) testSgnjF64()
println("f64 sgnj done")
for(_ <- 0 until 10000) testSgnjF32()
println("f32 sgnj done")
//380000000001ffef 5fffffffffff9ff 8000000000100000
// testBinaryOpF64(mul,-5.877471754282472E-39, 8.814425663400984E-280, -5.180654E-318 ,1, FpuRoundMode.RMM,"mul")
// 5.877471754282472E-39 8.814425663400984E-280 -5.180654E-318 RMM
for(_ <- 0 until 10000) testCvtF64F32() // 1 did not equal 3 Flag missmatch dut=1 ref=3 testCvtF64F32Raw 1.1754942807573643E-38 1.17549435E-38 RMM
println("FCVT_D_S done")
for(_ <- 0 until 10000) testCvtF32F64()
@ -1288,8 +1302,6 @@ class FpuTest extends FunSuite{
println("f64 f2ui done")
for(_ <- 0 until 10000) testSgnjF64()
println("f64 sgnj done")
@ -1481,7 +1493,7 @@ class FpuTest extends FunSuite{
// dut.clockDomain.waitSampling(1000)
// simSuccess()
for(i <- 0 until 10000) fxxTests.randomPick()()
for(i <- 0 until 100000) fxxTests.randomPick()()
waitUntil(cpu.rspQueue.isEmpty)
}