D$ now support memDataWidth > 32

This commit is contained in:
Dolu1990 2020-05-04 12:54:16 +02:00
parent 93b386e16e
commit b0f7f37ac8
5 changed files with 104 additions and 60 deletions

View File

@ -27,7 +27,7 @@ import spinal.lib.bus.avalon.AvalonMM
import spinal.lib.eda.altera.{InterruptReceiverTag, ResetEmitterTag}
//make clean all SEED=42 MMU=no STOP_ON_ERROR=yes DBUS_EXCLUSIVE=yes DBUS_INVALIDATE=yes SUPERVISOR=yes REDO=0 DHRYSTONE=no LRSC=yes AMO=yes LINUX_REGRESSION=yes TRACE=yes TRACE_START=9546629800l FLOW_INFO=ye
// make clean all SEED=42 MMU=no STOP_ON_ERROR=yes DBUS_EXCLUSIVE=yes DBUS_INVALIDATE=yes SUPERVISOR=yes REDO=1 DHRYSTONE=yes LRSC=yes AMO=yes LINUX_REGRESSION=yes TRACE=yes TRACE_START=1000000000 FLOW_INFO=ye IBUS_DATA_WIDTH=128 DBUS_DATA_WIDTH=128
object TestsWorkspace {
def main(args: Array[String]) {
def configFull = {
@ -60,7 +60,7 @@ object TestsWorkspace {
injectorStage = false,
config = InstructionCacheConfig(
cacheSize = 4096*1,
bytePerLine = 32,
bytePerLine = 64,
wayCount = 1,
addressWidth = 32,
cpuDataWidth = 32,
@ -92,11 +92,11 @@ object TestsWorkspace {
dBusRspSlavePipe = true,
config = new DataCacheConfig(
cacheSize = 4096*1,
bytePerLine = 32,
bytePerLine = 64,
wayCount = 1,
addressWidth = 32,
cpuDataWidth = 32,
memDataWidth = 32,
memDataWidth = 128,
catchAccessError = true,
catchIllegal = true,
catchUnaligned = true,

View File

@ -34,7 +34,7 @@ case class DataCacheConfig(cacheSize : Int,
assert(isPow2(pendingMax))
def withWriteResponse = withExclusive
def burstSize = bytePerLine*8/memDataWidth
val burstLength = bytePerLine/(memDataWidth/8)
val burstLength = bytePerLine/(cpuDataWidth/8)
def catchSomething = catchUnaligned || catchIllegal || catchAccessError
def withInternalAmo = withAmo && !withExclusive
def withInternalLrSc = withLrSc && !withExclusive
@ -196,8 +196,8 @@ case class DataCacheMemCmd(p : DataCacheConfig) extends Bundle{
val wr = Bool
val uncached = Bool
val address = UInt(p.addressWidth bit)
val data = Bits(p.memDataWidth bits)
val mask = Bits(p.memDataWidth/8 bits)
val data = Bits(p.cpuDataWidth bits)
val mask = Bits(p.cpuDataWidth/8 bits)
val length = UInt(log2Up(p.burstLength) bits)
val exclusive = p.withExclusive generate Bool()
val last = Bool
@ -424,7 +424,6 @@ object DataCacheExternalAmoStates extends SpinalEnum{
//If external amo, mem rsp should stay
class DataCache(val p : DataCacheConfig) extends Component{
import p._
assert(cpuDataWidth == memDataWidth)
val io = new Bundle{
val cpu = slave(DataCacheCpuBus(p))
@ -434,19 +433,24 @@ class DataCache(val p : DataCacheConfig) extends Component{
val haltCpu = False
val lineWidth = bytePerLine*8
val lineCount = cacheSize/bytePerLine
val wordWidth = Math.max(memDataWidth,cpuDataWidth)
val wordWidth = cpuDataWidth
val wordWidthLog2 = log2Up(wordWidth)
val wordPerLine = lineWidth/wordWidth
val bytePerWord = wordWidth/8
val wayLineCount = lineCount/wayCount
val wayLineLog2 = log2Up(wayLineCount)
val wayWordCount = wayLineCount * wordPerLine
val memWordPerLine = lineWidth/memDataWidth
val memTransactionPerLine = p.bytePerLine / (p.memDataWidth/8)
val bytePerMemWord = memDataWidth/8
val wayMemWordCount = wayLineCount * memWordPerLine
val tagRange = addressWidth-1 downto log2Up(wayLineCount*bytePerLine)
val lineRange = tagRange.low-1 downto log2Up(bytePerLine)
val wordRange = log2Up(bytePerLine)-1 downto log2Up(bytePerWord)
val cpuWordRange = log2Up(bytePerLine)-1 downto log2Up(bytePerWord)
val memWordRange = log2Up(bytePerLine)-1 downto log2Up(bytePerMemWord)
val hitRange = tagRange.high downto lineRange.low
val memWordToCpuWordRange = log2Up(bytePerMemWord)-1 downto log2Up(bytePerWord)
class LineInfo() extends Bundle{
@ -464,23 +468,24 @@ class DataCache(val p : DataCacheConfig) extends Component{
val tagsWriteLastCmd = RegNext(tagsWriteCmd)
val dataReadCmd = Flow(UInt(log2Up(wayWordCount) bits))
val dataReadCmd = Flow(UInt(log2Up(wayMemWordCount) bits))
val dataWriteCmd = Flow(new Bundle{
val way = Bits(wayCount bits)
val address = UInt(log2Up(wayWordCount) bits)
val data = Bits(wordWidth bits)
val mask = Bits(wordWidth/8 bits)
val address = UInt(log2Up(wayMemWordCount) bits)
val data = Bits(memDataWidth bits)
val mask = Bits(memDataWidth/8 bits)
})
val ways = for(i <- 0 until wayCount) yield new Area{
val tags = Mem(new LineInfo(), wayLineCount)
val data = Mem(Bits(wordWidth bit), wayWordCount)
val data = Mem(Bits(memDataWidth bit), wayMemWordCount)
//Reads
val tagsReadRsp = tags.readSync(tagsReadCmd.payload, tagsReadCmd.valid && !io.cpu.memory.isStuck)
val dataReadRsp = data.readSync(dataReadCmd.payload, dataReadCmd.valid && !io.cpu.memory.isStuck)
val dataReadRspMem = data.readSync(dataReadCmd.payload, dataReadCmd.valid && !io.cpu.memory.isStuck)
val dataReadRspSel = if(mergeExecuteMemory) io.cpu.writeBack.address else io.cpu.memory.address
val dataReadRsp = dataReadRspMem.subdivideIn(cpuDataWidth bits).read(dataReadRspSel(memWordToCpuWordRange))
val tagsInvReadRsp = withInvalidate generate tags.readSync(tagsInvReadCmd.payload, tagsInvReadCmd.valid)
@ -511,13 +516,15 @@ class DataCache(val p : DataCacheConfig) extends Component{
tagsReadCmd.valid := True
dataReadCmd.valid := True
tagsReadCmd.payload := io.cpu.execute.address(lineRange)
dataReadCmd.payload := io.cpu.execute.address(lineRange.high downto wordRange.low)
dataReadCmd.payload := io.cpu.execute.address(lineRange.high downto memWordRange.low)
}
def collisionProcess(readAddress : UInt, readMask : Bits): Bits ={
val ret = Bits(wayCount bits)
val readAddressAligned = (readAddress >> log2Up(memDataWidth/cpuDataWidth))
val dataWriteMaskAligned = dataWriteCmd.mask.subdivideIn(memDataWidth/cpuDataWidth slices).read(readAddress(log2Up(memDataWidth/cpuDataWidth)-1 downto 0))
for(i <- 0 until wayCount){
ret(i) := dataWriteCmd.valid && dataWriteCmd.way(i) && dataWriteCmd.address === readAddress && (readMask & dataWriteCmd.mask) =/= 0
ret(i) := dataWriteCmd.valid && dataWriteCmd.way(i) && dataWriteCmd.address === readAddressAligned && (readMask & dataWriteMaskAligned) =/= 0
}
ret
}
@ -600,7 +607,7 @@ class DataCache(val p : DataCacheConfig) extends Component{
U(1) -> B"0011",
default -> B"1111"
) |<< io.cpu.execute.address(1 downto 0)
val dataColisions = collisionProcess(io.cpu.execute.address(lineRange.high downto wordRange.low), mask)
val dataColisions = collisionProcess(io.cpu.execute.address(lineRange.high downto cpuWordRange.low), mask)
val wayInvalidate = B(0, wayCount bits) //Used if invalidate enabled
val isAmo = if(withAmo) io.cpu.execute.isAmo else False
@ -643,7 +650,7 @@ class DataCache(val p : DataCacheConfig) extends Component{
stagePipe(stage0.dataColisions)
} else {
//Assume the writeback stage will never be unstall memory acces while memory stage is stalled
stagePipe(stage0.dataColisions) | collisionProcess(io.cpu.memory.address(lineRange.high downto wordRange.low), mask)
stagePipe(stage0.dataColisions) | collisionProcess(io.cpu.memory.address(lineRange.high downto cpuWordRange.low), mask)
}
}
@ -667,7 +674,7 @@ class DataCache(val p : DataCacheConfig) extends Component{
//Loader interface
val loaderValid = False
val ioMemRspMuxed = io.mem.rsp.data.subdivideIn(cpuDataWidth bits).read(io.cpu.writeBack.address(memWordToCpuWordRange))
io.cpu.writeBack.haltIt := io.cpu.writeBack.isValid
@ -717,7 +724,7 @@ class DataCache(val p : DataCacheConfig) extends Component{
import DataCacheExternalAmoStates._
val amo = withAmo generate new Area{
def rf = request.data
def mem = if(withInternalAmo) dataMux else io.mem.rsp.data
def mem = if(withInternalAmo) dataMux else ioMemRspMuxed
val compare = request.amoCtrl.alu.msb
val unsigned = request.amoCtrl.alu(2 downto 1) === B"11"
val addSub = (rf.asSInt + Mux(compare, ~mem, mem).asSInt + Mux(compare, S(1), S(0))).asBits
@ -748,9 +755,10 @@ class DataCache(val p : DataCacheConfig) extends Component{
val cpuWriteToCache = False
when(cpuWriteToCache){
dataWriteCmd.valid setWhen(request.wr && waysHit)
dataWriteCmd.address := mmuRsp.physicalAddress(lineRange.high downto wordRange.low)
dataWriteCmd.data := requestDataBypass
dataWriteCmd.mask := mask
dataWriteCmd.address := mmuRsp.physicalAddress(lineRange.high downto memWordRange.low)
dataWriteCmd.data.subdivideIn(cpuDataWidth bits).foreach(_ := requestDataBypass)
dataWriteCmd.mask := 0
dataWriteCmd.mask.subdivideIn(cpuDataWidth/8 bits).write(io.cpu.writeBack.address(memWordToCpuWordRange), mask)
dataWriteCmd.way := waysHits
}
@ -761,7 +769,7 @@ class DataCache(val p : DataCacheConfig) extends Component{
io.cpu.writeBack.isWrite := request.wr
io.mem.cmd.valid := False
io.mem.cmd.address := mmuRsp.physicalAddress(tagRange.high downto wordRange.low) @@ U(0, wordRange.low bit)
io.mem.cmd.address := mmuRsp.physicalAddress(tagRange.high downto cpuWordRange.low) @@ U(0, cpuWordRange.low bits)
io.mem.cmd.length := 0
io.mem.cmd.last := True
io.mem.cmd.wr := request.wr
@ -825,7 +833,7 @@ class DataCache(val p : DataCacheConfig) extends Component{
//Write through
io.mem.cmd.valid setWhen(request.wr)
io.mem.cmd.address := mmuRsp.physicalAddress(tagRange.high downto wordRange.low) @@ U(0, wordRange.low bit)
io.mem.cmd.address := mmuRsp.physicalAddress(tagRange.high downto cpuWordRange.low) @@ U(0, cpuWordRange.low bits)
io.mem.cmd.length := 0
io.cpu.writeBack.haltIt clearWhen(!request.wr || io.mem.cmd.ready)
@ -861,7 +869,7 @@ class DataCache(val p : DataCacheConfig) extends Component{
}
when(bypassCache){
io.cpu.writeBack.data := io.mem.rsp.data
io.cpu.writeBack.data := ioMemRspMuxed
if(catchAccessError) io.cpu.writeBack.accessError := io.mem.rsp.valid && io.mem.rsp.error
} otherwise {
io.cpu.writeBack.data := dataMux

View File

@ -2028,7 +2028,7 @@ public:
#endif
error = false;
for(int idx = 0;idx < IBUS_DATA_WIDTH/32;idx++){
bool localError;
bool localError = false;
ws->iBusAccess(address+idx*4,((uint32_t*)&top->iBus_rsp_payload_data)+idx,&localError);
error |= localError;
}
@ -2342,7 +2342,7 @@ public:
#include <queue>
struct DBusCachedTask{
uint32_t data;
char data[DBUS_DATA_WIDTH/8];
bool error;
bool last;
bool exclusive;
@ -2386,21 +2386,43 @@ public:
bool error;
ws->dBusAccess(top->dBus_cmd_payload_address,1,2,top->dBus_cmd_payload_mask,&top->dBus_cmd_payload_data,&error);
#else
bool cancel = false;
bool cancel = false, error = false;
if(top->dBus_cmd_payload_exclusive){
bool hit = reservationValid && reservationAddress == top->dBus_cmd_payload_address;
rsp.exclusive = hit;
cancel = !hit;
reservationValid = false;
}
if(!cancel) ws->dBusAccess(top->dBus_cmd_payload_address,1,2,top->dBus_cmd_payload_mask,&top->dBus_cmd_payload_data,&rsp.error);
if(!cancel) {
for(int idx = 0;idx < 1;idx++){
bool localError = false;
ws->dBusAccess(top->dBus_cmd_payload_address+idx*4,1,2,top->dBus_cmd_payload_mask >> idx*4,((uint32_t*)&top->dBus_cmd_payload_data)+idx, &localError);
error |= localError;
//printf("%d ", (int)localError);
}
}
// printf("%x %d\n", top->dBus_cmd_payload_address, (int)error);
rsp.last = true;
rsp.error = error;
rsps.push(rsp);
#endif
} else {
for(int beat = 0;beat <= top->dBus_cmd_payload_length;beat++){
ws->dBusAccess(top->dBus_cmd_payload_address + beat * 4,0,2,0,&rsp.data,&rsp.error);
rsp.last = beat == top->dBus_cmd_payload_length;
bool error = false;
uint32_t beatCount = top->dBus_cmd_payload_length*32/DBUS_DATA_WIDTH;
for(int beat = 0;beat <= beatCount;beat++){
if(top->dBus_cmd_payload_length == 0){
uint32_t sel = (top->dBus_cmd_payload_address >> 2) & (DBUS_DATA_WIDTH/32-1);
ws->dBusAccess(top->dBus_cmd_payload_address,0,2,0,((uint32_t*)rsp.data) + sel,&error);
} else {
for(int idx = 0;idx < DBUS_DATA_WIDTH/32;idx++){
bool localError = false;
ws->dBusAccess(top->dBus_cmd_payload_address + beat * DBUS_DATA_WIDTH/8 + idx*4,0,2,0,((uint32_t*)rsp.data)+idx, &localError);
error |= localError;
}
}
rsp.last = beat == beatCount;
#ifdef DBUS_EXCLUSIVE
if(top->dBus_cmd_payload_exclusive){
rsp.exclusive = true;
@ -2408,6 +2430,7 @@ public:
reservationAddress = top->dBus_cmd_payload_address;
}
#endif
rsp.error = error;
rsps.push(rsp);
}
@ -2434,14 +2457,18 @@ public:
rsps.pop();
top->dBus_rsp_valid = 1;
top->dBus_rsp_payload_error = rsp.error;
top->dBus_rsp_payload_data = rsp.data;
for(int idx = 0;idx < DBUS_DATA_WIDTH/32;idx++){
((uint32_t*)&top->dBus_rsp_payload_data)[idx] = ((uint32_t*)rsp.data)[idx];
}
top->dBus_rsp_payload_last = rsp.last;
#ifdef DBUS_EXCLUSIVE
top->dBus_rsp_payload_exclusive = rsp.exclusive;
#endif
} else{
top->dBus_rsp_valid = 0;
top->dBus_rsp_payload_data = VL_RANDOM_I(32);
for(int idx = 0;idx < DBUS_DATA_WIDTH/32;idx++){
((uint32_t*)&top->dBus_rsp_payload_data)[idx] = VL_RANDOM_I(32);
}
top->dBus_rsp_payload_error = VL_RANDOM_I(1);
top->dBus_rsp_payload_last = VL_RANDOM_I(1);
#ifdef DBUS_EXCLUSIVE

View File

@ -5,6 +5,7 @@ IBUS?=CACHED
IBUS_TC?=no
IBUS_DATA_WIDTH?=32
DBUS?=CACHED
DBUS_DATA_WIDTH?=32
TRACE?=no
TRACE_ACCESS?=no
TRACE_START=0
@ -46,6 +47,7 @@ WITH_USER_IO?=no
ADDCFLAGS += -CFLAGS -DREGRESSION_PATH='\"$(REGRESSION_PATH)/\"'
ADDCFLAGS += -CFLAGS -DIBUS_${IBUS}
ADDCFLAGS += -CFLAGS -DIBUS_DATA_WIDTH=${IBUS_DATA_WIDTH}
ADDCFLAGS += -CFLAGS -DDBUS_DATA_WIDTH=${DBUS_DATA_WIDTH}
ADDCFLAGS += -CFLAGS -DDBUS_${DBUS}
ADDCFLAGS += -CFLAGS -DREDO=${REDO}

View File

@ -1,7 +1,7 @@
package vexriscv
import java.io.{File, OutputStream}
import java.util.concurrent.TimeUnit
import java.util.concurrent.{ForkJoinPool, TimeUnit}
import org.apache.commons.io.FileUtils
import org.scalatest.{BeforeAndAfterAll, FunSuite, ParallelTestExecution, Tag, Transformer}
@ -426,7 +426,8 @@ class DBusDimension extends VexRiscvDimension("DBus") {
// override def isCompatibleWith(positions: Seq[ConfigPosition[VexRiscvConfig]]) = catchAll == positions.exists(_.isInstanceOf[CatchAllPosition])
}
} else {
val bytePerLine = List(8,16,32,64)(r.nextInt(4))
val memDataWidth = List(32,64,128)(r.nextInt(3))
val bytePerLine = Math.max(memDataWidth/8, List(8,16,32,64)(r.nextInt(4)))
var cacheSize = 0
var wayCount = 0
val withLrSc = catchAll
@ -441,8 +442,8 @@ class DBusDimension extends VexRiscvDimension("DBus") {
cacheSize = 512 << r.nextInt(5)
wayCount = 1 << r.nextInt(3)
}while(cacheSize/wayCount < 512 || (catchAll && cacheSize/wayCount > 4096))
new VexRiscvPosition("Cached" + "S" + cacheSize + "W" + wayCount + "BPL" + bytePerLine + (if(dBusCmdMasterPipe) "Cmp " else "") + (if(dBusCmdSlavePipe) "Csp " else "") + (if(dBusRspSlavePipe) "Rsp " else "") + (if(relaxedMemoryTranslationRegister) "Rmtr " else "") + (if(earlyWaysHits) "Ewh " else "") + (if(withAmo) "Amo " else "") + (if(withSmp) "Smp " else "")) {
override def testParam = "DBUS=CACHED " + (if(withLrSc) "LRSC=yes " else "") + (if(withAmo) "AMO=yes " else "") + (if(withSmp) "DBUS_EXCLUSIVE=yes DBUS_INVALIDATE=yes " else "")
new VexRiscvPosition(s"Cached${memDataWidth}d" + "S" + cacheSize + "W" + wayCount + "BPL" + bytePerLine + (if(dBusCmdMasterPipe) "Cmp " else "") + (if(dBusCmdSlavePipe) "Csp " else "") + (if(dBusRspSlavePipe) "Rsp " else "") + (if(relaxedMemoryTranslationRegister) "Rmtr " else "") + (if(earlyWaysHits) "Ewh " else "") + (if(withAmo) "Amo " else "") + (if(withSmp) "Smp " else "")) {
override def testParam = s"DBUS=CACHED DBUS_DATA_WIDTH=$memDataWidth " + (if(withLrSc) "LRSC=yes " else "") + (if(withAmo) "AMO=yes " else "") + (if(withSmp) "DBUS_EXCLUSIVE=yes DBUS_INVALIDATE=yes " else "")
override def applyOn(config: VexRiscvConfig): Unit = {
config.plugins += new DBusCachedPlugin(
@ -452,7 +453,7 @@ class DBusDimension extends VexRiscvDimension("DBus") {
wayCount = wayCount,
addressWidth = 32,
cpuDataWidth = 32,
memDataWidth = 32,
memDataWidth = memDataWidth,
catchAccessError = catchAll,
catchIllegal = catchAll,
catchUnaligned = catchAll,
@ -574,8 +575,14 @@ object PlayFuture extends App{
Thread.sleep(8000)
}
class MultithreadedFunSuite extends FunSuite {
implicit val ec = ExecutionContext.global
class MultithreadedFunSuite(threadCount : Int) extends FunSuite {
val finalThreadCount = if(threadCount > 0) threadCount else {
val systemInfo = new oshi.SystemInfo
systemInfo.getHardware.getProcessor.getLogicalProcessorCount
}
implicit val ec = ExecutionContext.fromExecutorService(
new ForkJoinPool(finalThreadCount, ForkJoinPool.defaultForkJoinWorkerThreadFactory, null, true)
)
class Job(body : => Unit){
val originalOutput = Console.out
val buffer = mutable.Queue[Char]()
@ -612,7 +619,7 @@ class MultithreadedFunSuite extends FunSuite {
}
class FunTestPara extends MultithreadedFunSuite{
class FunTestPara extends MultithreadedFunSuite(3){
def createTest(name : String): Unit ={
test(name){
for(i <- 0 to 4) {
@ -624,20 +631,20 @@ class FunTestPara extends MultithreadedFunSuite{
(0 to 80).map(_.toString).foreach(createTest)
}
class FunTestPlay extends FunSuite {
def createTest(name : String): Unit ={
test(name){
Thread.sleep(500)
for(i <- 0 to 4) {
println(s"$name $i")
Thread.sleep(500)
}
}
}
(0 to 80).map(_.toString).foreach(createTest)
}
//class FunTestPlay extends FunSuite {
// def createTest(name : String): Unit ={
// test(name){
// Thread.sleep(500)
// for(i <- 0 to 4) {
// println(s"$name $i")
// Thread.sleep(500)
// }
// }
// }
// (0 to 80).map(_.toString).foreach(createTest)
//}
class TestIndividualFeatures extends MultithreadedFunSuite {
class TestIndividualFeatures extends MultithreadedFunSuite(sys.env.getOrElse("VEXRISCV_REGRESSION_THREAD_COUNT", "0").toInt) {
val testCount = sys.env.getOrElse("VEXRISCV_REGRESSION_CONFIG_COUNT", "100").toInt
val seed = sys.env.getOrElse("VEXRISCV_REGRESSION_SEED", Random.nextLong().toString).toLong
val testId : Set[Int] = sys.env.get("VEXRISCV_REGRESSION_TEST_ID") match {