diff --git a/src/main/scala/vexriscv/TestsWorkspace.scala b/src/main/scala/vexriscv/TestsWorkspace.scala index 04ceda3..40f8b33 100644 --- a/src/main/scala/vexriscv/TestsWorkspace.scala +++ b/src/main/scala/vexriscv/TestsWorkspace.scala @@ -27,7 +27,7 @@ import spinal.lib.bus.avalon.AvalonMM import spinal.lib.eda.altera.{InterruptReceiverTag, ResetEmitterTag} -//make clean all SEED=42 MMU=no STOP_ON_ERROR=yes DBUS_EXCLUSIVE=yes DBUS_INVALIDATE=yes SUPERVISOR=yes REDO=0 DHRYSTONE=no LRSC=yes AMO=yes LINUX_REGRESSION=yes TRACE=yes TRACE_START=9546629800l FLOW_INFO=ye +// make clean all SEED=42 MMU=no STOP_ON_ERROR=yes DBUS_EXCLUSIVE=yes DBUS_INVALIDATE=yes SUPERVISOR=yes REDO=1 DHRYSTONE=yes LRSC=yes AMO=yes LINUX_REGRESSION=yes TRACE=yes TRACE_START=1000000000 FLOW_INFO=ye IBUS_DATA_WIDTH=128 DBUS_DATA_WIDTH=128 object TestsWorkspace { def main(args: Array[String]) { def configFull = { @@ -60,7 +60,7 @@ object TestsWorkspace { injectorStage = false, config = InstructionCacheConfig( cacheSize = 4096*1, - bytePerLine = 32, + bytePerLine = 64, wayCount = 1, addressWidth = 32, cpuDataWidth = 32, @@ -92,11 +92,11 @@ object TestsWorkspace { dBusRspSlavePipe = true, config = new DataCacheConfig( cacheSize = 4096*1, - bytePerLine = 32, + bytePerLine = 64, wayCount = 1, addressWidth = 32, cpuDataWidth = 32, - memDataWidth = 32, + memDataWidth = 128, catchAccessError = true, catchIllegal = true, catchUnaligned = true, diff --git a/src/main/scala/vexriscv/ip/DataCache.scala b/src/main/scala/vexriscv/ip/DataCache.scala index 2e33999..9de6f09 100644 --- a/src/main/scala/vexriscv/ip/DataCache.scala +++ b/src/main/scala/vexriscv/ip/DataCache.scala @@ -34,7 +34,7 @@ case class DataCacheConfig(cacheSize : Int, assert(isPow2(pendingMax)) def withWriteResponse = withExclusive def burstSize = bytePerLine*8/memDataWidth - val burstLength = bytePerLine/(memDataWidth/8) + val burstLength = bytePerLine/(cpuDataWidth/8) def catchSomething = catchUnaligned || catchIllegal || catchAccessError def withInternalAmo = withAmo && !withExclusive def withInternalLrSc = withLrSc && !withExclusive @@ -196,8 +196,8 @@ case class DataCacheMemCmd(p : DataCacheConfig) extends Bundle{ val wr = Bool val uncached = Bool val address = UInt(p.addressWidth bit) - val data = Bits(p.memDataWidth bits) - val mask = Bits(p.memDataWidth/8 bits) + val data = Bits(p.cpuDataWidth bits) + val mask = Bits(p.cpuDataWidth/8 bits) val length = UInt(log2Up(p.burstLength) bits) val exclusive = p.withExclusive generate Bool() val last = Bool @@ -424,7 +424,6 @@ object DataCacheExternalAmoStates extends SpinalEnum{ //If external amo, mem rsp should stay class DataCache(val p : DataCacheConfig) extends Component{ import p._ - assert(cpuDataWidth == memDataWidth) val io = new Bundle{ val cpu = slave(DataCacheCpuBus(p)) @@ -434,19 +433,24 @@ class DataCache(val p : DataCacheConfig) extends Component{ val haltCpu = False val lineWidth = bytePerLine*8 val lineCount = cacheSize/bytePerLine - val wordWidth = Math.max(memDataWidth,cpuDataWidth) + val wordWidth = cpuDataWidth val wordWidthLog2 = log2Up(wordWidth) val wordPerLine = lineWidth/wordWidth val bytePerWord = wordWidth/8 val wayLineCount = lineCount/wayCount val wayLineLog2 = log2Up(wayLineCount) val wayWordCount = wayLineCount * wordPerLine + val memWordPerLine = lineWidth/memDataWidth val memTransactionPerLine = p.bytePerLine / (p.memDataWidth/8) + val bytePerMemWord = memDataWidth/8 + val wayMemWordCount = wayLineCount * memWordPerLine val tagRange = addressWidth-1 downto log2Up(wayLineCount*bytePerLine) val lineRange = tagRange.low-1 downto log2Up(bytePerLine) - val wordRange = log2Up(bytePerLine)-1 downto log2Up(bytePerWord) + val cpuWordRange = log2Up(bytePerLine)-1 downto log2Up(bytePerWord) + val memWordRange = log2Up(bytePerLine)-1 downto log2Up(bytePerMemWord) val hitRange = tagRange.high downto lineRange.low + val memWordToCpuWordRange = log2Up(bytePerMemWord)-1 downto log2Up(bytePerWord) class LineInfo() extends Bundle{ @@ -464,23 +468,24 @@ class DataCache(val p : DataCacheConfig) extends Component{ val tagsWriteLastCmd = RegNext(tagsWriteCmd) - val dataReadCmd = Flow(UInt(log2Up(wayWordCount) bits)) + val dataReadCmd = Flow(UInt(log2Up(wayMemWordCount) bits)) val dataWriteCmd = Flow(new Bundle{ val way = Bits(wayCount bits) - val address = UInt(log2Up(wayWordCount) bits) - val data = Bits(wordWidth bits) - val mask = Bits(wordWidth/8 bits) + val address = UInt(log2Up(wayMemWordCount) bits) + val data = Bits(memDataWidth bits) + val mask = Bits(memDataWidth/8 bits) }) - val ways = for(i <- 0 until wayCount) yield new Area{ val tags = Mem(new LineInfo(), wayLineCount) - val data = Mem(Bits(wordWidth bit), wayWordCount) + val data = Mem(Bits(memDataWidth bit), wayMemWordCount) //Reads val tagsReadRsp = tags.readSync(tagsReadCmd.payload, tagsReadCmd.valid && !io.cpu.memory.isStuck) - val dataReadRsp = data.readSync(dataReadCmd.payload, dataReadCmd.valid && !io.cpu.memory.isStuck) + val dataReadRspMem = data.readSync(dataReadCmd.payload, dataReadCmd.valid && !io.cpu.memory.isStuck) + val dataReadRspSel = if(mergeExecuteMemory) io.cpu.writeBack.address else io.cpu.memory.address + val dataReadRsp = dataReadRspMem.subdivideIn(cpuDataWidth bits).read(dataReadRspSel(memWordToCpuWordRange)) val tagsInvReadRsp = withInvalidate generate tags.readSync(tagsInvReadCmd.payload, tagsInvReadCmd.valid) @@ -511,13 +516,15 @@ class DataCache(val p : DataCacheConfig) extends Component{ tagsReadCmd.valid := True dataReadCmd.valid := True tagsReadCmd.payload := io.cpu.execute.address(lineRange) - dataReadCmd.payload := io.cpu.execute.address(lineRange.high downto wordRange.low) + dataReadCmd.payload := io.cpu.execute.address(lineRange.high downto memWordRange.low) } def collisionProcess(readAddress : UInt, readMask : Bits): Bits ={ val ret = Bits(wayCount bits) + val readAddressAligned = (readAddress >> log2Up(memDataWidth/cpuDataWidth)) + val dataWriteMaskAligned = dataWriteCmd.mask.subdivideIn(memDataWidth/cpuDataWidth slices).read(readAddress(log2Up(memDataWidth/cpuDataWidth)-1 downto 0)) for(i <- 0 until wayCount){ - ret(i) := dataWriteCmd.valid && dataWriteCmd.way(i) && dataWriteCmd.address === readAddress && (readMask & dataWriteCmd.mask) =/= 0 + ret(i) := dataWriteCmd.valid && dataWriteCmd.way(i) && dataWriteCmd.address === readAddressAligned && (readMask & dataWriteMaskAligned) =/= 0 } ret } @@ -600,7 +607,7 @@ class DataCache(val p : DataCacheConfig) extends Component{ U(1) -> B"0011", default -> B"1111" ) |<< io.cpu.execute.address(1 downto 0) - val dataColisions = collisionProcess(io.cpu.execute.address(lineRange.high downto wordRange.low), mask) + val dataColisions = collisionProcess(io.cpu.execute.address(lineRange.high downto cpuWordRange.low), mask) val wayInvalidate = B(0, wayCount bits) //Used if invalidate enabled val isAmo = if(withAmo) io.cpu.execute.isAmo else False @@ -643,7 +650,7 @@ class DataCache(val p : DataCacheConfig) extends Component{ stagePipe(stage0.dataColisions) } else { //Assume the writeback stage will never be unstall memory acces while memory stage is stalled - stagePipe(stage0.dataColisions) | collisionProcess(io.cpu.memory.address(lineRange.high downto wordRange.low), mask) + stagePipe(stage0.dataColisions) | collisionProcess(io.cpu.memory.address(lineRange.high downto cpuWordRange.low), mask) } } @@ -667,7 +674,7 @@ class DataCache(val p : DataCacheConfig) extends Component{ //Loader interface val loaderValid = False - + val ioMemRspMuxed = io.mem.rsp.data.subdivideIn(cpuDataWidth bits).read(io.cpu.writeBack.address(memWordToCpuWordRange)) io.cpu.writeBack.haltIt := io.cpu.writeBack.isValid @@ -717,7 +724,7 @@ class DataCache(val p : DataCacheConfig) extends Component{ import DataCacheExternalAmoStates._ val amo = withAmo generate new Area{ def rf = request.data - def mem = if(withInternalAmo) dataMux else io.mem.rsp.data + def mem = if(withInternalAmo) dataMux else ioMemRspMuxed val compare = request.amoCtrl.alu.msb val unsigned = request.amoCtrl.alu(2 downto 1) === B"11" val addSub = (rf.asSInt + Mux(compare, ~mem, mem).asSInt + Mux(compare, S(1), S(0))).asBits @@ -748,9 +755,10 @@ class DataCache(val p : DataCacheConfig) extends Component{ val cpuWriteToCache = False when(cpuWriteToCache){ dataWriteCmd.valid setWhen(request.wr && waysHit) - dataWriteCmd.address := mmuRsp.physicalAddress(lineRange.high downto wordRange.low) - dataWriteCmd.data := requestDataBypass - dataWriteCmd.mask := mask + dataWriteCmd.address := mmuRsp.physicalAddress(lineRange.high downto memWordRange.low) + dataWriteCmd.data.subdivideIn(cpuDataWidth bits).foreach(_ := requestDataBypass) + dataWriteCmd.mask := 0 + dataWriteCmd.mask.subdivideIn(cpuDataWidth/8 bits).write(io.cpu.writeBack.address(memWordToCpuWordRange), mask) dataWriteCmd.way := waysHits } @@ -761,7 +769,7 @@ class DataCache(val p : DataCacheConfig) extends Component{ io.cpu.writeBack.isWrite := request.wr io.mem.cmd.valid := False - io.mem.cmd.address := mmuRsp.physicalAddress(tagRange.high downto wordRange.low) @@ U(0, wordRange.low bit) + io.mem.cmd.address := mmuRsp.physicalAddress(tagRange.high downto cpuWordRange.low) @@ U(0, cpuWordRange.low bits) io.mem.cmd.length := 0 io.mem.cmd.last := True io.mem.cmd.wr := request.wr @@ -825,7 +833,7 @@ class DataCache(val p : DataCacheConfig) extends Component{ //Write through io.mem.cmd.valid setWhen(request.wr) - io.mem.cmd.address := mmuRsp.physicalAddress(tagRange.high downto wordRange.low) @@ U(0, wordRange.low bit) + io.mem.cmd.address := mmuRsp.physicalAddress(tagRange.high downto cpuWordRange.low) @@ U(0, cpuWordRange.low bits) io.mem.cmd.length := 0 io.cpu.writeBack.haltIt clearWhen(!request.wr || io.mem.cmd.ready) @@ -861,10 +869,10 @@ class DataCache(val p : DataCacheConfig) extends Component{ } when(bypassCache){ - io.cpu.writeBack.data := io.mem.rsp.data + io.cpu.writeBack.data := ioMemRspMuxed if(catchAccessError) io.cpu.writeBack.accessError := io.mem.rsp.valid && io.mem.rsp.error } otherwise { - io.cpu.writeBack.data := dataMux + io.cpu.writeBack.data := dataMux if(catchAccessError) io.cpu.writeBack.accessError := (waysHits & B(tagsReadRsp.map(_.error))) =/= 0 } diff --git a/src/test/cpp/regression/main.cpp b/src/test/cpp/regression/main.cpp index 5fa9635..e0f50ab 100644 --- a/src/test/cpp/regression/main.cpp +++ b/src/test/cpp/regression/main.cpp @@ -2028,7 +2028,7 @@ public: #endif error = false; for(int idx = 0;idx < IBUS_DATA_WIDTH/32;idx++){ - bool localError; + bool localError = false; ws->iBusAccess(address+idx*4,((uint32_t*)&top->iBus_rsp_payload_data)+idx,&localError); error |= localError; } @@ -2342,7 +2342,7 @@ public: #include struct DBusCachedTask{ - uint32_t data; + char data[DBUS_DATA_WIDTH/8]; bool error; bool last; bool exclusive; @@ -2386,21 +2386,43 @@ public: bool error; ws->dBusAccess(top->dBus_cmd_payload_address,1,2,top->dBus_cmd_payload_mask,&top->dBus_cmd_payload_data,&error); #else - bool cancel = false; + bool cancel = false, error = false; if(top->dBus_cmd_payload_exclusive){ bool hit = reservationValid && reservationAddress == top->dBus_cmd_payload_address; rsp.exclusive = hit; cancel = !hit; reservationValid = false; } - if(!cancel) ws->dBusAccess(top->dBus_cmd_payload_address,1,2,top->dBus_cmd_payload_mask,&top->dBus_cmd_payload_data,&rsp.error); + if(!cancel) { + for(int idx = 0;idx < 1;idx++){ + bool localError = false; + ws->dBusAccess(top->dBus_cmd_payload_address+idx*4,1,2,top->dBus_cmd_payload_mask >> idx*4,((uint32_t*)&top->dBus_cmd_payload_data)+idx, &localError); + error |= localError; + + //printf("%d ", (int)localError); + } + } + + // printf("%x %d\n", top->dBus_cmd_payload_address, (int)error); rsp.last = true; + rsp.error = error; rsps.push(rsp); #endif } else { - for(int beat = 0;beat <= top->dBus_cmd_payload_length;beat++){ - ws->dBusAccess(top->dBus_cmd_payload_address + beat * 4,0,2,0,&rsp.data,&rsp.error); - rsp.last = beat == top->dBus_cmd_payload_length; + bool error = false; + uint32_t beatCount = top->dBus_cmd_payload_length*32/DBUS_DATA_WIDTH; + for(int beat = 0;beat <= beatCount;beat++){ + if(top->dBus_cmd_payload_length == 0){ + uint32_t sel = (top->dBus_cmd_payload_address >> 2) & (DBUS_DATA_WIDTH/32-1); + ws->dBusAccess(top->dBus_cmd_payload_address,0,2,0,((uint32_t*)rsp.data) + sel,&error); + } else { + for(int idx = 0;idx < DBUS_DATA_WIDTH/32;idx++){ + bool localError = false; + ws->dBusAccess(top->dBus_cmd_payload_address + beat * DBUS_DATA_WIDTH/8 + idx*4,0,2,0,((uint32_t*)rsp.data)+idx, &localError); + error |= localError; + } + } + rsp.last = beat == beatCount; #ifdef DBUS_EXCLUSIVE if(top->dBus_cmd_payload_exclusive){ rsp.exclusive = true; @@ -2408,6 +2430,7 @@ public: reservationAddress = top->dBus_cmd_payload_address; } #endif + rsp.error = error; rsps.push(rsp); } @@ -2434,14 +2457,18 @@ public: rsps.pop(); top->dBus_rsp_valid = 1; top->dBus_rsp_payload_error = rsp.error; - top->dBus_rsp_payload_data = rsp.data; + for(int idx = 0;idx < DBUS_DATA_WIDTH/32;idx++){ + ((uint32_t*)&top->dBus_rsp_payload_data)[idx] = ((uint32_t*)rsp.data)[idx]; + } top->dBus_rsp_payload_last = rsp.last; #ifdef DBUS_EXCLUSIVE top->dBus_rsp_payload_exclusive = rsp.exclusive; #endif } else{ top->dBus_rsp_valid = 0; - top->dBus_rsp_payload_data = VL_RANDOM_I(32); + for(int idx = 0;idx < DBUS_DATA_WIDTH/32;idx++){ + ((uint32_t*)&top->dBus_rsp_payload_data)[idx] = VL_RANDOM_I(32); + } top->dBus_rsp_payload_error = VL_RANDOM_I(1); top->dBus_rsp_payload_last = VL_RANDOM_I(1); #ifdef DBUS_EXCLUSIVE diff --git a/src/test/cpp/regression/makefile b/src/test/cpp/regression/makefile index 9836326..da525c5 100644 --- a/src/test/cpp/regression/makefile +++ b/src/test/cpp/regression/makefile @@ -5,6 +5,7 @@ IBUS?=CACHED IBUS_TC?=no IBUS_DATA_WIDTH?=32 DBUS?=CACHED +DBUS_DATA_WIDTH?=32 TRACE?=no TRACE_ACCESS?=no TRACE_START=0 @@ -46,6 +47,7 @@ WITH_USER_IO?=no ADDCFLAGS += -CFLAGS -DREGRESSION_PATH='\"$(REGRESSION_PATH)/\"' ADDCFLAGS += -CFLAGS -DIBUS_${IBUS} ADDCFLAGS += -CFLAGS -DIBUS_DATA_WIDTH=${IBUS_DATA_WIDTH} +ADDCFLAGS += -CFLAGS -DDBUS_DATA_WIDTH=${DBUS_DATA_WIDTH} ADDCFLAGS += -CFLAGS -DDBUS_${DBUS} ADDCFLAGS += -CFLAGS -DREDO=${REDO} diff --git a/src/test/scala/vexriscv/TestIndividualFeatures.scala b/src/test/scala/vexriscv/TestIndividualFeatures.scala index 72651b4..8a7ace1 100644 --- a/src/test/scala/vexriscv/TestIndividualFeatures.scala +++ b/src/test/scala/vexriscv/TestIndividualFeatures.scala @@ -1,7 +1,7 @@ package vexriscv import java.io.{File, OutputStream} -import java.util.concurrent.TimeUnit +import java.util.concurrent.{ForkJoinPool, TimeUnit} import org.apache.commons.io.FileUtils import org.scalatest.{BeforeAndAfterAll, FunSuite, ParallelTestExecution, Tag, Transformer} @@ -426,7 +426,8 @@ class DBusDimension extends VexRiscvDimension("DBus") { // override def isCompatibleWith(positions: Seq[ConfigPosition[VexRiscvConfig]]) = catchAll == positions.exists(_.isInstanceOf[CatchAllPosition]) } } else { - val bytePerLine = List(8,16,32,64)(r.nextInt(4)) + val memDataWidth = List(32,64,128)(r.nextInt(3)) + val bytePerLine = Math.max(memDataWidth/8, List(8,16,32,64)(r.nextInt(4))) var cacheSize = 0 var wayCount = 0 val withLrSc = catchAll @@ -441,8 +442,8 @@ class DBusDimension extends VexRiscvDimension("DBus") { cacheSize = 512 << r.nextInt(5) wayCount = 1 << r.nextInt(3) }while(cacheSize/wayCount < 512 || (catchAll && cacheSize/wayCount > 4096)) - new VexRiscvPosition("Cached" + "S" + cacheSize + "W" + wayCount + "BPL" + bytePerLine + (if(dBusCmdMasterPipe) "Cmp " else "") + (if(dBusCmdSlavePipe) "Csp " else "") + (if(dBusRspSlavePipe) "Rsp " else "") + (if(relaxedMemoryTranslationRegister) "Rmtr " else "") + (if(earlyWaysHits) "Ewh " else "") + (if(withAmo) "Amo " else "") + (if(withSmp) "Smp " else "")) { - override def testParam = "DBUS=CACHED " + (if(withLrSc) "LRSC=yes " else "") + (if(withAmo) "AMO=yes " else "") + (if(withSmp) "DBUS_EXCLUSIVE=yes DBUS_INVALIDATE=yes " else "") + new VexRiscvPosition(s"Cached${memDataWidth}d" + "S" + cacheSize + "W" + wayCount + "BPL" + bytePerLine + (if(dBusCmdMasterPipe) "Cmp " else "") + (if(dBusCmdSlavePipe) "Csp " else "") + (if(dBusRspSlavePipe) "Rsp " else "") + (if(relaxedMemoryTranslationRegister) "Rmtr " else "") + (if(earlyWaysHits) "Ewh " else "") + (if(withAmo) "Amo " else "") + (if(withSmp) "Smp " else "")) { + override def testParam = s"DBUS=CACHED DBUS_DATA_WIDTH=$memDataWidth " + (if(withLrSc) "LRSC=yes " else "") + (if(withAmo) "AMO=yes " else "") + (if(withSmp) "DBUS_EXCLUSIVE=yes DBUS_INVALIDATE=yes " else "") override def applyOn(config: VexRiscvConfig): Unit = { config.plugins += new DBusCachedPlugin( @@ -452,7 +453,7 @@ class DBusDimension extends VexRiscvDimension("DBus") { wayCount = wayCount, addressWidth = 32, cpuDataWidth = 32, - memDataWidth = 32, + memDataWidth = memDataWidth, catchAccessError = catchAll, catchIllegal = catchAll, catchUnaligned = catchAll, @@ -574,8 +575,14 @@ object PlayFuture extends App{ Thread.sleep(8000) } -class MultithreadedFunSuite extends FunSuite { - implicit val ec = ExecutionContext.global +class MultithreadedFunSuite(threadCount : Int) extends FunSuite { + val finalThreadCount = if(threadCount > 0) threadCount else { + val systemInfo = new oshi.SystemInfo + systemInfo.getHardware.getProcessor.getLogicalProcessorCount + } + implicit val ec = ExecutionContext.fromExecutorService( + new ForkJoinPool(finalThreadCount, ForkJoinPool.defaultForkJoinWorkerThreadFactory, null, true) + ) class Job(body : => Unit){ val originalOutput = Console.out val buffer = mutable.Queue[Char]() @@ -612,7 +619,7 @@ class MultithreadedFunSuite extends FunSuite { } -class FunTestPara extends MultithreadedFunSuite{ +class FunTestPara extends MultithreadedFunSuite(3){ def createTest(name : String): Unit ={ test(name){ for(i <- 0 to 4) { @@ -624,20 +631,20 @@ class FunTestPara extends MultithreadedFunSuite{ (0 to 80).map(_.toString).foreach(createTest) } -class FunTestPlay extends FunSuite { - def createTest(name : String): Unit ={ - test(name){ - Thread.sleep(500) - for(i <- 0 to 4) { - println(s"$name $i") - Thread.sleep(500) - } - } - } - (0 to 80).map(_.toString).foreach(createTest) -} +//class FunTestPlay extends FunSuite { +// def createTest(name : String): Unit ={ +// test(name){ +// Thread.sleep(500) +// for(i <- 0 to 4) { +// println(s"$name $i") +// Thread.sleep(500) +// } +// } +// } +// (0 to 80).map(_.toString).foreach(createTest) +//} -class TestIndividualFeatures extends MultithreadedFunSuite { +class TestIndividualFeatures extends MultithreadedFunSuite(sys.env.getOrElse("VEXRISCV_REGRESSION_THREAD_COUNT", "0").toInt) { val testCount = sys.env.getOrElse("VEXRISCV_REGRESSION_CONFIG_COUNT", "100").toInt val seed = sys.env.getOrElse("VEXRISCV_REGRESSION_SEED", Random.nextLong().toString).toLong val testId : Set[Int] = sys.env.get("VEXRISCV_REGRESSION_TEST_ID") match {