diff --git a/src/main/scala/vexriscv/ip/InstructionCache.scala b/src/main/scala/vexriscv/ip/InstructionCache.scala
index 43f5130..dc97444 100644
--- a/src/main/scala/vexriscv/ip/InstructionCache.scala
+++ b/src/main/scala/vexriscv/ip/InstructionCache.scala
@@ -24,7 +24,8 @@ case class InstructionCacheConfig( cacheSize : Int,
                                    twoCycleRam : Boolean = false,
                                    twoCycleRamInnerMux : Boolean = false,
                                    preResetFlush : Boolean = false,
-                                   bypassGen : Boolean = false ){
+                                   bypassGen : Boolean = false,
+                                   reducedBankWidth : Boolean = false){
 
   assert(!(twoCycleRam && !twoCycleCache))
 
@@ -286,23 +287,13 @@ class InstructionCache(p : InstructionCacheConfig, mmuParameter : MemoryTranslat
 
   val lineWidth = bytePerLine*8
   val lineCount = cacheSize/bytePerLine
-  val wordWidth = cpuDataWidth
-  val wordWidthLog2 = log2Up(wordWidth)
-  val wordPerLine = lineWidth/wordWidth
+  val cpuWordWidth = cpuDataWidth
   val memWordPerLine = lineWidth/memDataWidth
-  val bytePerWord = wordWidth/8
-  val bytePerMemWord = memDataWidth/8
+  val bytePerCpuWord = cpuWordWidth/8
   val wayLineCount = lineCount/wayCount
-  val wayLineLog2 = log2Up(wayLineCount)
-  val wayMemWordCount = wayLineCount * memWordPerLine
 
   val tagRange = addressWidth-1 downto log2Up(wayLineCount*bytePerLine)
   val lineRange = tagRange.low-1 downto log2Up(bytePerLine)
-  val wordRange = log2Up(bytePerLine)-1 downto log2Up(bytePerWord)
-  val memWordRange = log2Up(bytePerLine)-1 downto log2Up(bytePerMemWord)
-  val memWordToCpuWordRange = log2Up(bytePerMemWord)-1 downto log2Up(bytePerWord)
-  val tagLineRange = tagRange.high downto lineRange.low
-  val lineWordRange = lineRange.high downto wordRange.low
 
   case class LineTag() extends Bundle{
     val valid = Bool
@@ -310,10 +301,17 @@ class InstructionCache(p : InstructionCacheConfig, mmuParameter : MemoryTranslat
     val address = UInt(tagRange.length bit)
   }
 
+  val bankCount = wayCount
+  val bankWidth = if(!reducedBankWidth) memDataWidth else Math.max(cpuDataWidth, memDataWidth/wayCount)
+  val bankByteSize = cacheSize/bankCount
+  val bankWordCount = bankByteSize*8/bankWidth
+  val bankWordToCpuWordRange = log2Up(bankWidth/8)-1 downto log2Up(bytePerCpuWord)
+  val memToBankRatio = bankWidth*bankCount / memDataWidth
+
+  val banks = Seq.fill(bankCount)(Mem(Bits(bankWidth bits), bankWordCount))
 
   val ways = Seq.fill(wayCount)(new Area{
     val tags = Mem(LineTag(),wayLineCount)
-    val datas = Mem(Bits(memDataWidth bits),wayMemWordCount)
 
     if(preResetFlush){
       tags.initBigInt(List.fill(wayLineCount)(BigInt(0)))
@@ -367,7 +365,7 @@ class InstructionCache(p : InstructionCacheConfig, mmuParameter : MemoryTranslat
 
     val write = new Area{
       val tag = ways.map(_.tags.writePort)
-      val data = ways.map(_.datas.writePort)
+      val data = banks.map(_.writePort)
     }
 
     for(wayId <- 0 until wayCount){
@@ -378,13 +376,24 @@ class InstructionCache(p : InstructionCacheConfig, mmuParameter : MemoryTranslat
       tag.data.valid := flushCounter.msb
       tag.data.error := hadError || io.mem.rsp.error
       tag.data.address := address(tagRange)
-
-      val data = write.data(wayId)
-      data.valid   := io.mem.rsp.valid && wayHit
-      data.address := address(lineRange) @@ wordIndex
-      data.data    := io.mem.rsp.data
     }
 
+    for((writeBank, bankId) <- write.data.zipWithIndex){
+      if(!reducedBankWidth) {
+        writeBank.valid := io.mem.rsp.valid && wayToAllocate === bankId
+        writeBank.address := address(lineRange) @@ wordIndex
+        writeBank.data := io.mem.rsp.data
+      } else {
+        val sel = U(bankId) - wayToAllocate.value
+        val groupSel = wayToAllocate(log2Up(bankCount)-1 downto log2Up(bankCount/memToBankRatio))
+        val subSel = sel(log2Up(bankCount/memToBankRatio) -1 downto 0)
+        writeBank.valid := io.mem.rsp.valid && groupSel === (bankId >> log2Up(bankCount/memToBankRatio))
+        writeBank.address := address(lineRange) @@ wordIndex @@ (subSel)
+        writeBank.data := io.mem.rsp.data.subdivideIn(bankCount/memToBankRatio slices)(subSel)
+      }
+    }
+
+
     when(io.mem.rsp.valid) {
       wordIndex := (wordIndex + 1).resized
       hadError.setWhen(io.mem.rsp.error)
@@ -394,17 +403,20 @@ class InstructionCache(p : InstructionCacheConfig, mmuParameter : MemoryTranslat
     }
   }
 
-
   val fetchStage = new Area{
     val read = new Area{
-      val waysValues = for(way <- ways) yield new Area{
+      val banksValue = for(bank <- banks) yield new Area{
+        val dataMem = bank.readSync(io.cpu.prefetch.pc(lineRange.high downto log2Up(bankWidth/8)), !io.cpu.fetch.isStuck)
+        val data = if(!twoCycleRamInnerMux) dataMem.subdivideIn(cpuDataWidth bits).read(io.cpu.fetch.pc(bankWordToCpuWordRange)) else dataMem
+      }
+
+      val waysValues = for((way, wayId) <- ways.zipWithIndex) yield new Area{
         val tag = if(asyncTagMemory) {
           way.tags.readAsync(io.cpu.fetch.pc(lineRange))
         }else {
           way.tags.readSync(io.cpu.prefetch.pc(lineRange), !io.cpu.fetch.isStuck)
         }
-        val dataMem = way.datas.readSync(io.cpu.prefetch.pc(lineRange.high downto memWordRange.low), !io.cpu.fetch.isStuck)
-        val data = if(!twoCycleRamInnerMux) dataMem.subdivideIn(cpuDataWidth bits).read(io.cpu.fetch.pc(memWordToCpuWordRange)) else dataMem
+//        val data = CombInit(banksValue(wayId).data)
       }
     }
 
@@ -412,10 +424,11 @@ class InstructionCache(p : InstructionCacheConfig, mmuParameter : MemoryTranslat
     val hit = (!twoCycleRam) generate new Area{
       val hits = read.waysValues.map(way => way.tag.valid && way.tag.address === io.cpu.fetch.mmuRsp.physicalAddress(tagRange))
       val valid = Cat(hits).orR
-      val id = OHToUInt(hits)
-      val error = read.waysValues.map(_.tag.error).read(id)
-      val data = read.waysValues.map(_.data).read(id)
-      val word = if(cpuDataWidth == memDataWidth || !twoCycleRamInnerMux) CombInit(data) else data.subdivideIn(cpuDataWidth bits).read(io.cpu.fetch.pc(memWordToCpuWordRange))
+      val wayId = OHToUInt(hits)
+      val bankId = if(!reducedBankWidth) wayId else (wayId >> log2Up(bankCount/memToBankRatio)) @@ ((wayId + (io.cpu.fetch.mmuRsp.physicalAddress(log2Up(bankWidth/8), log2Up(bankCount) bits))).resize(log2Up(bankCount/memToBankRatio)))
+      val error = read.waysValues.map(_.tag.error).read(wayId)
+      val data = read.banksValue.map(_.data).read(bankId)
+      val word = if(cpuDataWidth == memDataWidth || !twoCycleRamInnerMux) CombInit(data) else data.subdivideIn(cpuDataWidth bits).read(io.cpu.fetch.pc(bankWordToCpuWordRange))
       io.cpu.fetch.data := (if(p.bypassGen) (io.cpu.fetch.dataBypassValid ? io.cpu.fetch.dataBypass | word) else word)
       if(twoCycleCache){
         io.cpu.decode.data := RegNextWhen(io.cpu.fetch.data,!io.cpu.decode.isStuck)
@@ -423,7 +436,7 @@ class InstructionCache(p : InstructionCacheConfig, mmuParameter : MemoryTranslat
     }
 
     if(twoCycleRam && wayCount == 1){
-      val cacheData = if(cpuDataWidth == memDataWidth || !twoCycleRamInnerMux) CombInit(read.waysValues.head.data) else read.waysValues.head.data.subdivideIn(cpuDataWidth bits).read(io.cpu.fetch.pc(memWordToCpuWordRange))
+      val cacheData = if(cpuDataWidth == memDataWidth || !twoCycleRamInnerMux) CombInit(read.banksValue.head.data) else read.banksValue.head.data.subdivideIn(cpuDataWidth bits).read(io.cpu.fetch.pc(bankWordToCpuWordRange))
       io.cpu.fetch.data := (if(p.bypassGen) (io.cpu.fetch.dataBypassValid ? io.cpu.fetch.dataBypass | cacheData) else cacheData)
     }
 
@@ -452,10 +465,11 @@ class InstructionCache(p : InstructionCacheConfig, mmuParameter : MemoryTranslat
       val tags = fetchStage.read.waysValues.map(way => stage(way.tag))
       val hits = tags.map(tag => tag.valid && tag.address === mmuRsp.physicalAddress(tagRange))
       val valid = Cat(hits).orR
-      val id = OHToUInt(hits)
-      val error = tags(id).error
-      val data = fetchStage.read.waysValues.map(way => stage(way.data)).read(id)
-      val word = if(cpuDataWidth == memDataWidth || !twoCycleRamInnerMux) data else data.subdivideIn(cpuDataWidth bits).read(io.cpu.decode.pc(memWordToCpuWordRange))
+      val wayId = OHToUInt(hits)
+      val bankId = if(!reducedBankWidth) wayId else (wayId >> log2Up(bankCount/memToBankRatio)) @@ ((wayId + (mmuRsp.physicalAddress(log2Up(bankWidth/8), log2Up(bankCount) bits))).resize(log2Up(bankCount/memToBankRatio)))
+      val error = tags(wayId).error
+      val data = fetchStage.read.banksValue.map(bank => stage(bank.data)).read(bankId)
+      val word = if(cpuDataWidth == memDataWidth || !twoCycleRamInnerMux) data else data.subdivideIn(cpuDataWidth bits).read(io.cpu.decode.pc(bankWordToCpuWordRange))
       if(p.bypassGen) when(stage(io.cpu.fetch.dataBypassValid)){
         word := stage(io.cpu.fetch.dataBypass)
       }
diff --git a/src/test/scala/vexriscv/TestIndividualFeatures.scala b/src/test/scala/vexriscv/TestIndividualFeatures.scala
index 66b308b..f3f5b28 100644
--- a/src/test/scala/vexriscv/TestIndividualFeatures.scala
+++ b/src/test/scala/vexriscv/TestIndividualFeatures.scala
@@ -356,6 +356,7 @@ class IBusDimension(rvcRate : Double) extends VexRiscvDimension("IBus") {
       val catchAll = universes.contains(VexRiscvUniverse.CATCH_ALL)
       val compressed = r.nextDouble() < rvcRate
       val tighlyCoupled = r.nextBoolean() && !catchAll
+      val reducedBankWidth = r.nextBoolean()
 //      val tighlyCoupled = false
       val prediction = random(r, List(NONE, STATIC, DYNAMIC, DYNAMIC_TARGET))
       val relaxedPcCalculation, twoCycleCache, injectorStage = r.nextBoolean()
@@ -392,7 +393,8 @@ class IBusDimension(rvcRate : Double) extends VexRiscvDimension("IBus") {
               asyncTagMemory = false,
               twoCycleRam = twoCycleRam,
               twoCycleCache = twoCycleCache,
-              twoCycleRamInnerMux = twoCycleRamInnerMux
+              twoCycleRamInnerMux = twoCycleRamInnerMux,
+              reducedBankWidth = reducedBankWidth
             )
           )
           if(tighlyCoupled) p.newTightlyCoupledPort(TightlyCoupledPortParameter("iBusTc", a => a(30 downto 28) === 0x0))