Add i$ reduceBankWidth to take advantage of multi way by remaping the data location to reduce on chip ram data width

2020-05-12 23:58:28 +02:00 · 2020-05-12 23:58:28 +02:00 · 685c914227
parent 0471c7ad76
commit 685c914227
2 changed files with 50 additions and 34 deletions
--- a/src/main/scala/vexriscv/ip/InstructionCache.scala
+++ b/src/main/scala/vexriscv/ip/InstructionCache.scala
@ -24,7 +24,8 @@ case class InstructionCacheConfig( cacheSize : Int,
                                   twoCycleRam : Boolean = false,
                                   twoCycleRamInnerMux : Boolean = false,
                                   preResetFlush : Boolean = false,
-                                   bypassGen : Boolean = false ){
+                                   bypassGen : Boolean = false,
                                   reducedBankWidth : Boolean = false){
  assert(!(twoCycleRam && !twoCycleCache))
@ -286,23 +287,13 @@ class InstructionCache(p : InstructionCacheConfig, mmuParameter : MemoryTranslat
  val lineWidth = bytePerLine*8
  val lineCount = cacheSize/bytePerLine
-  val wordWidth = cpuDataWidth
+  val cpuWordWidth = cpuDataWidth
  val wordWidthLog2 = log2Up(wordWidth)
  val wordPerLine = lineWidth/wordWidth
  val memWordPerLine = lineWidth/memDataWidth
-  val bytePerWord = wordWidth/8
+  val bytePerCpuWord = cpuWordWidth/8
  val bytePerMemWord = memDataWidth/8
  val wayLineCount = lineCount/wayCount
  val wayLineLog2 = log2Up(wayLineCount)
  val wayMemWordCount = wayLineCount * memWordPerLine
  val tagRange = addressWidth-1 downto log2Up(wayLineCount*bytePerLine)
  val lineRange = tagRange.low-1 downto log2Up(bytePerLine)
  val wordRange = log2Up(bytePerLine)-1 downto log2Up(bytePerWord)
  val memWordRange = log2Up(bytePerLine)-1 downto log2Up(bytePerMemWord)
  val memWordToCpuWordRange = log2Up(bytePerMemWord)-1 downto log2Up(bytePerWord)
  val tagLineRange = tagRange.high downto lineRange.low
  val lineWordRange = lineRange.high downto wordRange.low
  case class LineTag() extends Bundle{
    val valid = Bool
@ -310,10 +301,17 @@ class InstructionCache(p : InstructionCacheConfig, mmuParameter : MemoryTranslat
    val address = UInt(tagRange.length bit)
  }
  val bankCount = wayCount
  val bankWidth = if(!reducedBankWidth) memDataWidth else Math.max(cpuDataWidth, memDataWidth/wayCount)
  val bankByteSize = cacheSize/bankCount
  val bankWordCount = bankByteSize*8/bankWidth
  val bankWordToCpuWordRange = log2Up(bankWidth/8)-1 downto log2Up(bytePerCpuWord)
  val memToBankRatio = bankWidth*bankCount / memDataWidth
  val banks = Seq.fill(bankCount)(Mem(Bits(bankWidth bits), bankWordCount))
  val ways = Seq.fill(wayCount)(new Area{
    val tags = Mem(LineTag(),wayLineCount)
    val datas = Mem(Bits(memDataWidth bits),wayMemWordCount)
    if(preResetFlush){
      tags.initBigInt(List.fill(wayLineCount)(BigInt(0)))
@ -367,7 +365,7 @@ class InstructionCache(p : InstructionCacheConfig, mmuParameter : MemoryTranslat
    val write = new Area{
      val tag = ways.map(_.tags.writePort)
-      val data = ways.map(_.datas.writePort)
+      val data = banks.map(_.writePort)
    }
    for(wayId <- 0 until wayCount){
@ -378,13 +376,24 @@ class InstructionCache(p : InstructionCacheConfig, mmuParameter : MemoryTranslat
      tag.data.valid := flushCounter.msb
      tag.data.error := hadError || io.mem.rsp.error
      tag.data.address := address(tagRange)
      val data = write.data(wayId)
      data.valid   := io.mem.rsp.valid && wayHit
      data.address := address(lineRange) @@ wordIndex
      data.data    := io.mem.rsp.data
    }
    for((writeBank, bankId) <- write.data.zipWithIndex){
      if(!reducedBankWidth) {
        writeBank.valid := io.mem.rsp.valid && wayToAllocate === bankId
        writeBank.address := address(lineRange) @@ wordIndex
        writeBank.data := io.mem.rsp.data
      } else {
        val sel = U(bankId) - wayToAllocate.value
        val groupSel = wayToAllocate(log2Up(bankCount)-1 downto log2Up(bankCount/memToBankRatio))
        val subSel = sel(log2Up(bankCount/memToBankRatio) -1 downto 0)
        writeBank.valid := io.mem.rsp.valid && groupSel === (bankId >> log2Up(bankCount/memToBankRatio))
        writeBank.address := address(lineRange) @@ wordIndex @@ (subSel)
        writeBank.data := io.mem.rsp.data.subdivideIn(bankCount/memToBankRatio slices)(subSel)
      }
    }
    when(io.mem.rsp.valid) {
      wordIndex := (wordIndex + 1).resized
      hadError.setWhen(io.mem.rsp.error)
@ -394,17 +403,20 @@ class InstructionCache(p : InstructionCacheConfig, mmuParameter : MemoryTranslat
    }
  }
  val fetchStage = new Area{
    val read = new Area{
-      val waysValues = for(way <- ways) yield new Area{
+      val banksValue = for(bank <- banks) yield new Area{
        val dataMem = bank.readSync(io.cpu.prefetch.pc(lineRange.high downto log2Up(bankWidth/8)), !io.cpu.fetch.isStuck)
        val data = if(!twoCycleRamInnerMux) dataMem.subdivideIn(cpuDataWidth bits).read(io.cpu.fetch.pc(bankWordToCpuWordRange)) else dataMem
      }
      val waysValues = for((way, wayId) <- ways.zipWithIndex) yield new Area{
        val tag = if(asyncTagMemory) {
          way.tags.readAsync(io.cpu.fetch.pc(lineRange))
        }else {
          way.tags.readSync(io.cpu.prefetch.pc(lineRange), !io.cpu.fetch.isStuck)
        }
-        val dataMem = way.datas.readSync(io.cpu.prefetch.pc(lineRange.high downto memWordRange.low), !io.cpu.fetch.isStuck)
+//        val data = CombInit(banksValue(wayId).data)
        val data = if(!twoCycleRamInnerMux) dataMem.subdivideIn(cpuDataWidth bits).read(io.cpu.fetch.pc(memWordToCpuWordRange)) else dataMem
      }
    }
@ -412,10 +424,11 @@ class InstructionCache(p : InstructionCacheConfig, mmuParameter : MemoryTranslat
    val hit = (!twoCycleRam) generate new Area{
      val hits = read.waysValues.map(way => way.tag.valid && way.tag.address === io.cpu.fetch.mmuRsp.physicalAddress(tagRange))
      val valid = Cat(hits).orR
-      val id = OHToUInt(hits)
+      val wayId = OHToUInt(hits)
-      val error = read.waysValues.map(_.tag.error).read(id)
+      val bankId = if(!reducedBankWidth) wayId else (wayId >> log2Up(bankCount/memToBankRatio)) @@ ((wayId + (io.cpu.fetch.mmuRsp.physicalAddress(log2Up(bankWidth/8), log2Up(bankCount) bits))).resize(log2Up(bankCount/memToBankRatio)))
-      val data = read.waysValues.map(_.data).read(id)
+      val error = read.waysValues.map(_.tag.error).read(wayId)
-      val word = if(cpuDataWidth == memDataWidth || !twoCycleRamInnerMux) CombInit(data) else data.subdivideIn(cpuDataWidth bits).read(io.cpu.fetch.pc(memWordToCpuWordRange))
+      val data = read.banksValue.map(_.data).read(bankId)
      val word = if(cpuDataWidth == memDataWidth || !twoCycleRamInnerMux) CombInit(data) else data.subdivideIn(cpuDataWidth bits).read(io.cpu.fetch.pc(bankWordToCpuWordRange))
      io.cpu.fetch.data := (if(p.bypassGen) (io.cpu.fetch.dataBypassValid ? io.cpu.fetch.dataBypass | word) else word)
      if(twoCycleCache){
        io.cpu.decode.data := RegNextWhen(io.cpu.fetch.data,!io.cpu.decode.isStuck)
@ -423,7 +436,7 @@ class InstructionCache(p : InstructionCacheConfig, mmuParameter : MemoryTranslat
    }
    if(twoCycleRam && wayCount == 1){
-      val cacheData = if(cpuDataWidth == memDataWidth || !twoCycleRamInnerMux) CombInit(read.waysValues.head.data) else read.waysValues.head.data.subdivideIn(cpuDataWidth bits).read(io.cpu.fetch.pc(memWordToCpuWordRange))
+      val cacheData = if(cpuDataWidth == memDataWidth || !twoCycleRamInnerMux) CombInit(read.banksValue.head.data) else read.banksValue.head.data.subdivideIn(cpuDataWidth bits).read(io.cpu.fetch.pc(bankWordToCpuWordRange))
      io.cpu.fetch.data := (if(p.bypassGen) (io.cpu.fetch.dataBypassValid ? io.cpu.fetch.dataBypass | cacheData) else cacheData)
    }
@ -452,10 +465,11 @@ class InstructionCache(p : InstructionCacheConfig, mmuParameter : MemoryTranslat
      val tags = fetchStage.read.waysValues.map(way => stage(way.tag))
      val hits = tags.map(tag => tag.valid && tag.address === mmuRsp.physicalAddress(tagRange))
      val valid = Cat(hits).orR
-      val id = OHToUInt(hits)
+      val wayId = OHToUInt(hits)
-      val error = tags(id).error
+      val bankId = if(!reducedBankWidth) wayId else (wayId >> log2Up(bankCount/memToBankRatio)) @@ ((wayId + (mmuRsp.physicalAddress(log2Up(bankWidth/8), log2Up(bankCount) bits))).resize(log2Up(bankCount/memToBankRatio)))
-      val data = fetchStage.read.waysValues.map(way => stage(way.data)).read(id)
+      val error = tags(wayId).error
-      val word = if(cpuDataWidth == memDataWidth || !twoCycleRamInnerMux) data else data.subdivideIn(cpuDataWidth bits).read(io.cpu.decode.pc(memWordToCpuWordRange))
+      val data = fetchStage.read.banksValue.map(bank => stage(bank.data)).read(bankId)
      val word = if(cpuDataWidth == memDataWidth || !twoCycleRamInnerMux) data else data.subdivideIn(cpuDataWidth bits).read(io.cpu.decode.pc(bankWordToCpuWordRange))
      if(p.bypassGen) when(stage(io.cpu.fetch.dataBypassValid)){
        word := stage(io.cpu.fetch.dataBypass)
      }
--- a/src/test/scala/vexriscv/TestIndividualFeatures.scala
+++ b/src/test/scala/vexriscv/TestIndividualFeatures.scala
@ -356,6 +356,7 @@ class IBusDimension(rvcRate : Double) extends VexRiscvDimension("IBus") {
      val catchAll = universes.contains(VexRiscvUniverse.CATCH_ALL)
      val compressed = r.nextDouble() < rvcRate
      val tighlyCoupled = r.nextBoolean() && !catchAll
      val reducedBankWidth = r.nextBoolean()
 //      val tighlyCoupled = false
      val prediction = random(r, List(NONE, STATIC, DYNAMIC, DYNAMIC_TARGET))
      val relaxedPcCalculation, twoCycleCache, injectorStage = r.nextBoolean()
@ -392,7 +393,8 @@ class IBusDimension(rvcRate : Double) extends VexRiscvDimension("IBus") {
              asyncTagMemory = false,
              twoCycleRam = twoCycleRam,
              twoCycleCache = twoCycleCache,
-              twoCycleRamInnerMux = twoCycleRamInnerMux
+              twoCycleRamInnerMux = twoCycleRamInnerMux,
              reducedBankWidth = reducedBankWidth
            )
          )
          if(tighlyCoupled) p.newTightlyCoupledPort(TightlyCoupledPortParameter("iBusTc", a => a(30 downto 28) === 0x0))