From c9bbf0d12a7654b939bd764bd0c621006e12f7ab Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Sat, 4 Apr 2020 21:21:35 +0200 Subject: [PATCH 01/91] update LrSc reservation logic to match the spec --- src/main/scala/vexriscv/ip/DataCache.scala | 37 ++-- .../vexriscv/plugin/DBusCachedPlugin.scala | 1 - .../vexriscv/plugin/DBusSimplePlugin.scala | 8 +- src/test/cpp/raw/lrsc/build/lrsc.asm | 194 ++++++------------ src/test/cpp/raw/lrsc/build/lrsc.hex | 61 ++---- src/test/cpp/raw/lrsc/src/crt.S | 94 +-------- src/test/cpp/regression/main.cpp | 4 +- src/test/cpp/regression/makefile | 1 - .../vexriscv/TestIndividualFeatures.scala | 2 +- 9 files changed, 113 insertions(+), 289 deletions(-) diff --git a/src/main/scala/vexriscv/ip/DataCache.scala b/src/main/scala/vexriscv/ip/DataCache.scala index 7cbe4ba..0dfe74e 100644 --- a/src/main/scala/vexriscv/ip/DataCache.scala +++ b/src/main/scala/vexriscv/ip/DataCache.scala @@ -25,13 +25,16 @@ case class DataCacheConfig(cacheSize : Int, tagSizeShift : Int = 0, //Used to force infering ram withLrSc : Boolean = false, withAmo : Boolean = false, + withSmp : Boolean = false, mergeExecuteMemory : Boolean = false){ assert(!(mergeExecuteMemory && (earlyDataMux || earlyWaysHits))) assert(!(earlyDataMux && !earlyWaysHits)) def burstSize = bytePerLine*8/memDataWidth val burstLength = bytePerLine/(memDataWidth/8) def catchSomething = catchUnaligned || catchIllegal || catchAccessError - + def withInternalAmo = withAmo && !withSmp + def withInternalLrSc = withLrSc && !withSmp + def withExternalLrSc = withLrSc && withSmp def getAxi4SharedConfig() = Axi4Config( addressWidth = addressWidth, dataWidth = memDataWidth, @@ -133,15 +136,13 @@ case class DataCacheCpuWriteBack(p : DataCacheConfig) extends Bundle with IMaste val isWrite = Bool val data = Bits(p.cpuDataWidth bit) val address = UInt(p.addressWidth bit) - val mmuException, unalignedAccess , accessError = Bool - val clearLrsc = ifGen(p.withLrSc) {Bool} + val mmuException, unalignedAccess, accessError = Bool // val exceptionBus = if(p.catchSomething) Flow(ExceptionCause()) else null override def asMaster(): Unit = { out(isValid,isStuck,isUser, address) in(haltIt, data, mmuException, unalignedAccess, accessError, isWrite) - outWithNull(clearLrsc) } } @@ -169,11 +170,13 @@ case class DataCacheMemCmd(p : DataCacheConfig) extends Bundle{ val data = Bits(p.memDataWidth bits) val mask = Bits(p.memDataWidth/8 bits) val length = UInt(log2Up(p.burstLength) bits) + val exclusive = p.withSmp generate Bool() val last = Bool } case class DataCacheMemRsp(p : DataCacheConfig) extends Bundle{ val data = Bits(p.memDataWidth bit) val error = Bool + val exclusive = p.withSmp generate Bool() } case class DataCacheMemBus(p : DataCacheConfig) extends Bundle with IMasterSlave{ @@ -516,19 +519,17 @@ class DataCache(p : DataCacheConfig) extends Component{ } - val lrsc = withLrSc generate new Area{ + val lrSc = withLrSc generate new Area{ val reserved = RegInit(False) - when(io.cpu.writeBack.isValid && !io.cpu.writeBack.isStuck && !io.cpu.redo && request.isLrsc && !request.wr){ - reserved := True - } - when(io.cpu.writeBack.clearLrsc){ - reserved := False + when(io.cpu.writeBack.isValid && !io.cpu.writeBack.isStuck && request.isLrsc + && !io.cpu.redo && !io.cpu.writeBack.mmuException && !io.cpu.writeBack.unalignedAccess && !io.cpu.writeBack.accessError){ + reserved := !request.wr } } val requestDataBypass = CombInit(request.data) val isAmo = if(withAmo) request.isAmo else False - val amo = withAmo generate new Area{ + val internalAmo = withInternalAmo generate new Area{ def rf = request.data def mem = dataMux @@ -550,6 +551,7 @@ class DataCache(p : DataCacheConfig) extends Component{ } + val memCmdSent = RegInit(False) setWhen (io.mem.cmd.ready) clearWhen (!io.cpu.writeBack.isStuck) io.cpu.redo := False io.cpu.writeBack.accessError := False @@ -564,9 +566,10 @@ class DataCache(p : DataCacheConfig) extends Component{ io.mem.cmd.wr := request.wr io.mem.cmd.mask := mask io.mem.cmd.data := requestDataBypass + if(withExternalLrSc) io.mem.cmd.exclusive := request.isLrsc || request.isAmo when(io.cpu.writeBack.isValid) { - when(mmuRsp.isIoAccess) { + when(mmuRsp.isIoAccess || (if(withExternalLrSc) request.isLrsc else False)) { io.cpu.writeBack.haltIt.clearWhen(request.wr ? io.mem.cmd.ready | io.mem.rsp.valid) io.mem.cmd.valid := !memCmdSent @@ -574,7 +577,7 @@ class DataCache(p : DataCacheConfig) extends Component{ io.mem.cmd.length := 0 io.mem.cmd.last := True - if(withLrSc) when(request.isLrsc && !lrsc.reserved){ + if(withInternalLrSc) when(request.isLrsc && !lrSc.reserved){ io.mem.cmd.valid := False io.cpu.writeBack.haltIt := False } @@ -595,7 +598,7 @@ class DataCache(p : DataCacheConfig) extends Component{ io.cpu.writeBack.haltIt clearWhen(!request.wr || io.mem.cmd.ready) if(withAmo) when(isAmo){ - when(!amo.resultRegValid) { + when(!internalAmo.resultRegValid) { io.mem.cmd.valid := False dataWriteCmd.valid := False io.cpu.writeBack.haltIt := True @@ -608,7 +611,7 @@ class DataCache(p : DataCacheConfig) extends Component{ if(withAmo) io.mem.cmd.valid := False } - if(withLrSc) when(request.isLrsc && !lrsc.reserved){ + if(withInternalLrSc) when(request.isLrsc && !lrSc.reserved){ io.mem.cmd.valid := False dataWriteCmd.valid := False io.cpu.writeBack.haltIt := False @@ -648,12 +651,12 @@ class DataCache(p : DataCacheConfig) extends Component{ if(withLrSc){ when(request.isLrsc && request.wr){ - io.cpu.writeBack.data := (!lrsc.reserved).asBits.resized + io.cpu.writeBack.data := (!lrSc.reserved).asBits.resized } } if(withAmo){ when(request.isAmo){ - requestDataBypass := amo.resultReg + requestDataBypass := internalAmo.resultReg } } } diff --git a/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala b/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala index fd45dd8..73b0fb0 100644 --- a/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala +++ b/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala @@ -254,7 +254,6 @@ class DBusCachedPlugin(val config : DataCacheConfig, cache.io.cpu.writeBack.isStuck := arbitration.isStuck cache.io.cpu.writeBack.isUser := (if(privilegeService != null) privilegeService.isUser() else False) cache.io.cpu.writeBack.address := U(input(REGFILE_WRITE_DATA)) - if(withLrSc) cache.io.cpu.writeBack.clearLrsc := service(classOf[IContextSwitching]).isContextSwitching redoBranch.valid := False redoBranch.payload := input(PC) diff --git a/src/main/scala/vexriscv/plugin/DBusSimplePlugin.scala b/src/main/scala/vexriscv/plugin/DBusSimplePlugin.scala index e08b640..0b02da2 100644 --- a/src/main/scala/vexriscv/plugin/DBusSimplePlugin.scala +++ b/src/main/scala/vexriscv/plugin/DBusSimplePlugin.scala @@ -469,13 +469,9 @@ class DBusSimplePlugin(catchAddressMisaligned : Boolean = false, val atomic = withLrSc generate new Area{ val reserved = RegInit(False) insert(ATOMIC_HIT) := reserved - when(arbitration.isFiring && input(MEMORY_ENABLE) && input(MEMORY_ATOMIC) && !input(MEMORY_STORE)){ - reserved := True + when(arbitration.isFiring && input(MEMORY_ENABLE) && input(MEMORY_ATOMIC) && !input(MMU_FAULT) && !skipCmd){ + reserved := !input(MEMORY_STORE) } - when(service(classOf[IContextSwitching]).isContextSwitching){ - reserved := False - } - when(input(MEMORY_STORE) && input(MEMORY_ATOMIC) && !input(ATOMIC_HIT)){ skipCmd := True } diff --git a/src/test/cpp/raw/lrsc/build/lrsc.asm b/src/test/cpp/raw/lrsc/build/lrsc.asm index 95b4751..4ff24b4 100644 --- a/src/test/cpp/raw/lrsc/build/lrsc.asm +++ b/src/test/cpp/raw/lrsc/build/lrsc.asm @@ -38,9 +38,9 @@ Disassembly of section .crt_section: 80000060: 00d52023 sw a3,0(a0) # 10000000 80000064: 18b5262f sc.w a2,a1,(a0) 80000068: 00100713 li a4,1 -8000006c: 26e61e63 bne a2,a4,800002e8 +8000006c: 14e61a63 bne a2,a4,800001c0 80000070: 00052703 lw a4,0(a0) -80000074: 26e69a63 bne a3,a4,800002e8 +80000074: 14e69663 bne a3,a4,800001c0 80000078: 00200e13 li t3,2 8000007c: 10000537 lui a0,0x10000 80000080: 00450513 addi a0,a0,4 # 10000004 @@ -50,9 +50,9 @@ Disassembly of section .crt_section: 80000090: 00d52023 sw a3,0(a0) 80000094: 18b5262f sc.w a2,a1,(a0) 80000098: 00100713 li a4,1 -8000009c: 24e61663 bne a2,a4,800002e8 +8000009c: 12e61263 bne a2,a4,800001c0 800000a0: 00052703 lw a4,0(a0) -800000a4: 24e69263 bne a3,a4,800002e8 +800000a4: 10e69e63 bne a3,a4,800001c0 800000a8: 00300e13 li t3,3 800000ac: 10000537 lui a0,0x10000 800000b0: 00450513 addi a0,a0,4 # 10000004 @@ -61,9 +61,9 @@ Disassembly of section .crt_section: 800000bc: 06900693 li a3,105 800000c0: 18b5262f sc.w a2,a1,(a0) 800000c4: 00100713 li a4,1 -800000c8: 22e61063 bne a2,a4,800002e8 +800000c8: 0ee61c63 bne a2,a4,800001c0 800000cc: 00052703 lw a4,0(a0) -800000d0: 20e69c63 bne a3,a4,800002e8 +800000d0: 0ee69863 bne a3,a4,800001c0 800000d4: 00400e13 li t3,4 800000d8: 10000537 lui a0,0x10000 800000dc: 00850513 addi a0,a0,8 # 10000008 @@ -73,10 +73,10 @@ Disassembly of section .crt_section: 800000ec: 00d52023 sw a3,0(a0) 800000f0: 100527af lr.w a5,(a0) 800000f4: 18b5262f sc.w a2,a1,(a0) -800000f8: 1ed79863 bne a5,a3,800002e8 -800000fc: 1e061663 bnez a2,800002e8 +800000f8: 0cd79463 bne a5,a3,800001c0 +800000fc: 0c061263 bnez a2,800001c0 80000100: 00052703 lw a4,0(a0) -80000104: 1ee59263 bne a1,a4,800002e8 +80000104: 0ae59e63 bne a1,a4,800001c0 80000108: 00500e13 li t3,5 8000010c: 10000537 lui a0,0x10000 80000110: 00850513 addi a0,a0,8 # 10000008 @@ -85,133 +85,59 @@ Disassembly of section .crt_section: 8000011c: 06f00693 li a3,111 80000120: 00d52023 sw a3,0(a0) 80000124: 18b5262f sc.w a2,a1,(a0) -80000128: 1c061063 bnez a2,800002e8 +80000128: 08060c63 beqz a2,800001c0 8000012c: 00052703 lw a4,0(a0) -80000130: 1ae59c63 bne a1,a4,800002e8 -80000134: 00600e13 li t3,6 +80000130: 08e69863 bne a3,a4,800001c0 +80000134: 00700e13 li t3,7 80000138: 10000537 lui a0,0x10000 -8000013c: 00c50513 addi a0,a0,12 # 1000000c -80000140: 07000593 li a1,112 -80000144: 07100613 li a2,113 -80000148: 07200693 li a3,114 -8000014c: 10000437 lui s0,0x10000 -80000150: 01040413 addi s0,s0,16 # 10000010 -80000154: 07300493 li s1,115 -80000158: 07400913 li s2,116 -8000015c: 07500993 li s3,117 -80000160: 00d52023 sw a3,0(a0) -80000164: 01342023 sw s3,0(s0) -80000168: 100527af lr.w a5,(a0) -8000016c: 10042aaf lr.w s5,(s0) -80000170: 18b5262f sc.w a2,a1,(a0) -80000174: 1894292f sc.w s2,s1,(s0) -80000178: 16d79863 bne a5,a3,800002e8 -8000017c: 16061663 bnez a2,800002e8 -80000180: 00052703 lw a4,0(a0) -80000184: 16e59263 bne a1,a4,800002e8 -80000188: 173a9063 bne s5,s3,800002e8 -8000018c: 14091e63 bnez s2,800002e8 -80000190: 00042a03 lw s4,0(s0) -80000194: 15449a63 bne s1,s4,800002e8 -80000198: 00700e13 li t3,7 -8000019c: 10000537 lui a0,0x10000 -800001a0: 01450513 addi a0,a0,20 # 10000014 -800001a4: 07800593 li a1,120 -800001a8: 07900613 li a2,121 -800001ac: 07a00693 li a3,122 -800001b0: 01000e93 li t4,16 +8000013c: 01450513 addi a0,a0,20 # 10000014 +80000140: 07800593 li a1,120 +80000144: 07900613 li a2,121 +80000148: 07a00693 li a3,122 +8000014c: 01000e93 li t4,16 -800001b4 : -800001b4: 00d52023 sw a3,0(a0) -800001b8: 100527af lr.w a5,(a0) -800001bc: 18b5262f sc.w a2,a1,(a0) -800001c0: 12d79463 bne a5,a3,800002e8 -800001c4: 12061263 bnez a2,800002e8 -800001c8: 00052703 lw a4,0(a0) -800001cc: 10e59e63 bne a1,a4,800002e8 -800001d0: fffe8e93 addi t4,t4,-1 -800001d4: 00450513 addi a0,a0,4 -800001d8: 00358593 addi a1,a1,3 -800001dc: 00360613 addi a2,a2,3 -800001e0: 00368693 addi a3,a3,3 -800001e4: fc0e98e3 bnez t4,800001b4 -800001e8: 00900e13 li t3,9 -800001ec: 10000537 lui a0,0x10000 -800001f0: 10050513 addi a0,a0,256 # 10000100 -800001f4: 07b00593 li a1,123 -800001f8: 07c00613 li a2,124 -800001fc: 07d00693 li a3,125 -80000200: 00d52023 sw a3,0(a0) -80000204: 100527af lr.w a5,(a0) -80000208: 00000073 ecall -8000020c: 18b5262f sc.w a2,a1,(a0) -80000210: 00100713 li a4,1 -80000214: 0ce61a63 bne a2,a4,800002e8 -80000218: 00052703 lw a4,0(a0) -8000021c: 0ce69663 bne a3,a4,800002e8 -80000220: 00b00e13 li t3,11 -80000224: 10000537 lui a0,0x10000 -80000228: 30050513 addi a0,a0,768 # 10000300 -8000022c: 08200593 li a1,130 -80000230: 08300613 li a2,131 -80000234: 08400693 li a3,132 -80000238: 00d52023 sw a3,0(a0) -8000023c: 00001eb7 lui t4,0x1 -80000240: 800e8e93 addi t4,t4,-2048 # 800 -80000244: 304e9073 csrw mie,t4 -80000248: 00800e93 li t4,8 -8000024c: 100527af lr.w a5,(a0) -80000250: 300e9073 csrw mstatus,t4 -80000254: 00000013 nop -80000258: 00000013 nop -8000025c: 00000013 nop -80000260: 00000013 nop -80000264: 00000013 nop -80000268: 00000013 nop -8000026c: 18b5262f sc.w a2,a1,(a0) -80000270: 00100713 li a4,1 -80000274: 06e61a63 bne a2,a4,800002e8 -80000278: 00052703 lw a4,0(a0) -8000027c: 06e69663 bne a3,a4,800002e8 -80000280: 00c00e13 li t3,12 -80000284: 10000537 lui a0,0x10000 -80000288: 40050513 addi a0,a0,1024 # 10000400 -8000028c: 08c00593 li a1,140 -80000290: 08d00613 li a2,141 -80000294: 08e00693 li a3,142 -80000298: 00d52023 sw a3,0(a0) -8000029c: 00001eb7 lui t4,0x1 -800002a0: 800e8e93 addi t4,t4,-2048 # 800 -800002a4: 304e9073 csrw mie,t4 -800002a8: 00002eb7 lui t4,0x2 -800002ac: 808e8e93 addi t4,t4,-2040 # 1808 -800002b0: 100527af lr.w a5,(a0) -800002b4: 300e9073 csrw mstatus,t4 -800002b8: 00000013 nop -800002bc: 00000013 nop -800002c0: 00000013 nop -800002c4: 00000013 nop -800002c8: 00000013 nop -800002cc: 00000013 nop -800002d0: 18b5262f sc.w a2,a1,(a0) -800002d4: 00100713 li a4,1 -800002d8: 00e61863 bne a2,a4,800002e8 -800002dc: 00052703 lw a4,0(a0) -800002e0: 00e69463 bne a3,a4,800002e8 -800002e4: 0100006f j 800002f4 +80000150 : +80000150: 00d52023 sw a3,0(a0) +80000154: 100527af lr.w a5,(a0) +80000158: 18b5262f sc.w a2,a1,(a0) +8000015c: 06d79263 bne a5,a3,800001c0 +80000160: 06061063 bnez a2,800001c0 +80000164: 00052703 lw a4,0(a0) +80000168: 04e59c63 bne a1,a4,800001c0 +8000016c: fffe8e93 addi t4,t4,-1 +80000170: 00450513 addi a0,a0,4 +80000174: 00358593 addi a1,a1,3 +80000178: 00360613 addi a2,a2,3 +8000017c: 00368693 addi a3,a3,3 +80000180: fc0e98e3 bnez t4,80000150 +80000184: 00900e13 li t3,9 +80000188: 10000537 lui a0,0x10000 +8000018c: 10050513 addi a0,a0,256 # 10000100 +80000190: 07b00593 li a1,123 +80000194: 07c00613 li a2,124 +80000198: 07d00693 li a3,125 +8000019c: 00d52023 sw a3,0(a0) +800001a0: 100527af lr.w a5,(a0) +800001a4: 00000073 ecall +800001a8: 18b527af sc.w a5,a1,(a0) +800001ac: 00000713 li a4,0 +800001b0: 00e79863 bne a5,a4,800001c0 +800001b4: 00052703 lw a4,0(a0) +800001b8: 00e59463 bne a1,a4,800001c0 +800001bc: 0100006f j 800001cc -800002e8 : -800002e8: f0100137 lui sp,0xf0100 -800002ec: f2410113 addi sp,sp,-220 # f00fff24 -800002f0: 01c12023 sw t3,0(sp) +800001c0 : +800001c0: f0100137 lui sp,0xf0100 +800001c4: f2410113 addi sp,sp,-220 # f00fff24 +800001c8: 01c12023 sw t3,0(sp) -800002f4 : -800002f4: f0100137 lui sp,0xf0100 -800002f8: f2010113 addi sp,sp,-224 # f00fff20 -800002fc: 00012023 sw zero,0(sp) -80000300: 00000013 nop -80000304: 00000013 nop -80000308: 00000013 nop -8000030c: 00000013 nop -80000310: 00000013 nop -80000314: 00000013 nop +800001cc : +800001cc: f0100137 lui sp,0xf0100 +800001d0: f2010113 addi sp,sp,-224 # f00fff20 +800001d4: 00012023 sw zero,0(sp) +800001d8: 00000013 nop +800001dc: 00000013 nop +800001e0: 00000013 nop +800001e4: 00000013 nop +800001e8: 00000013 nop +800001ec: 00000013 nop diff --git a/src/test/cpp/raw/lrsc/build/lrsc.hex b/src/test/cpp/raw/lrsc/build/lrsc.hex index 7b96205..1c1cd4c 100644 --- a/src/test/cpp/raw/lrsc/build/lrsc.hex +++ b/src/test/cpp/raw/lrsc/build/lrsc.hex @@ -5,49 +5,30 @@ :10003000938E0E8073900E3073002030F32E1034A8 :10004000938E4E0073901E3473002030130E1000F8 :100050003705001093054006130650069306600608 -:100060002320D5002F26B51813071000631EE6269F -:1000700003270500639AE626130E200037050010BB +:100060002320D5002F26B51813071000631AE614B5 +:10007000032705006396E614130E200037050010D1 :100080001305450093057006130680069306900637 -:100090002320D5002F26B518130710006316E62479 -:1000A000032705006392E624130E30003705001085 +:100090002320D5002F26B518130710006312E6128F +:1000A00003270500639EE610130E3000370500108D :1000B0001305450093057006130680069306900607 -:1000C0002F26B518130710006310E622032705003A -:1000D000639CE620130E40003705001013058500D1 +:1000C0002F26B51813071000631CE60E0327050042 +:1000D0006398E60E130E40003705001013058500E7 :1000E0009305A0061306B0069306C0062320D5008C -:1000F000AF2705102F26B5186398D71E6316061E66 -:10010000032705006392E51E130E5000370500100B +:1000F000AF2705102F26B5186394D70C6312060C92 +:1001000003270500639EE50A130E50003705001013 :10011000130585009305D0061306E0069306F00646 -:100120002320D5002F26B5186310061C03270500D1 -:10013000639CE51A130E6000370500101305C50017 -:1001400093050007130610079306200737040010D5 -:10015000130404019304300713094007930950075F -:100160002320D50023203401AF270510AF2A041027 -:100170002F26B5182F2994186398D71663160616DC -:10018000032705006392E51663903A17631E09146E -:10019000032A0400639A4415130E700037050010FB -:1001A0001305450193058007130690079306A007E2 -:1001B000930E00012320D500AF2705102F26B51878 -:1001C0006394D7126312061203270500639EE5109D -:1001D000938EFEFF13054500938535001306360008 -:1001E00093863600E3980EFC130E9000370500103E -:1001F000130505109305B0071306C0079306D00733 -:100200002320D500AF270510730000002F26B51856 -:1002100013071000631AE60C032705006396E60C2B -:10022000130EB000370500101305053093052008A4 -:1002300013063008930640082320D500B71E00009F -:10024000938E0E8073904E30930E8000AF27051072 -:1002500073900E3013000000130000001300000024 -:100260001300000013000000130000002F26B51833 -:1002700013071000631AE606032705006396E606D7 -:10028000130EC00037050010130505409305C00884 -:100290001306D0089306E0082320D500B71E0000FF -:1002A000938E0E8073904E30B72E0000938E8E800A -:1002B000AF27051073900E301300000013000000EC -:1002C00013000000130000001300000013000000E2 -:1002D0002F26B518130710006318E6000327050042 -:1002E0006394E6006F000001370110F0130141F242 -:1002F0002320C101370110F0130101F22320010076 -:1003000013000000130000001300000013000000A1 -:080310001300000013000000BF +:100120002320D5002F26B518630C060803270500E9 +:100130006398E608130E700037050010130545019B +:1001400093058007130690079306A007930E0001FE +:100150002320D500AF2705102F26B5186392D706A8 +:100160006310060603270500639CE504938EFEFFDB +:100170001305450093853500130636009386360037 +:10018000E3980EFC130E90003705001013050510C0 +:100190009305B0071306C0079306D0072320D500A8 +:1001A000AF27051073000000AF27B5181307000034 +:1001B0006398E700032705006394E5006F000001E2 +:1001C000370110F0130141F22320C101370110F073 +:1001D000130101F2232001001300000013000000AE +:1001E00013000000130000001300000013000000C3 :040000058000004C2B :00000001FF diff --git a/src/test/cpp/raw/lrsc/src/crt.S b/src/test/cpp/raw/lrsc/src/crt.S index 7fef5e3..0cddcd3 100644 --- a/src/test/cpp/raw/lrsc/src/crt.S +++ b/src/test/cpp/raw/lrsc/src/crt.S @@ -81,7 +81,7 @@ _start: bne a1, a4, fail -//Test 5 redo SC on reserved area should pass and should be written write memory +//Test 5 redo SC on reserved area should fail li x28, 5 li a0, 0x10000008 li a1, 109 @@ -89,36 +89,10 @@ _start: li a3, 111 sw a3, 0(a0) sc.w a2, a1, (a0) - bne a2, x0, fail + beq a2, x0, fail lw a4, 0(a0) - bne a1, a4, fail + bne a3, a4, fail -//Test 6 Allow two entries at the same time - li x28, 6 - li a0, 0x1000000C - li a1, 112 - li a2, 113 - li a3, 114 - li s0, 0x10000010 - li s1, 115 - li s2, 116 - li s3, 117 - - sw a3, 0(a0) - sw s3, 0(s0) - lr.w a5, (a0) - lr.w s5, (s0) - sc.w a2, a1, (a0) - sc.w s2, s1, (s0) - bne a5, a3, fail - bne a2, x0, fail - lw a4, 0(a0) - bne a1, a4, fail - - bne s5, s3, fail - bne s2, x0, fail - lw s4, 0(s0) - bne s1, s4, fail //Test 7 do a lot of allocation to clear the entries li x28, 7 @@ -157,7 +131,7 @@ test7: bne a5, a4, fail*/ -//Test 9 SC should fail after a context switching +//Test 9 SC should pass after a context switching li x28, 9 li a0, 0x10000100 li a1, 123 @@ -166,11 +140,11 @@ test7: sw a3, 0(a0) lr.w a5, (a0) scall - sc.w a2, a1, (a0) - li a4, 1 - bne a2, a4, fail + sc.w a5, a1, (a0) + li a4, 0 + bne a5, a4, fail lw a4, 0(a0) - bne a3, a4, fail + bne a1, a4, fail @@ -192,58 +166,6 @@ test7: bne a7, a4, fail*/ - -//Test 11 SC should fail after a external interrupt context switching - li x28, 11 - li a0, 0x10000300 - li a1, 130 - li a2, 131 - li a3, 132 - sw a3, 0(a0) - li x29, 0x800 //800 external interrupts - csrw mie,x29 - li x29, 0x008 //008 enable interrupts - lr.w a5, (a0) - csrw mstatus,x29 //Enable external interrupt (will jump instantly due to testbench setup) - nop - nop - nop - nop - nop - nop - sc.w a2, a1, (a0) - li a4, 1 - bne a2, a4, fail - lw a4, 0(a0) - bne a3, a4, fail - - -//Test 12 SC should fail after a external interrupt context switching (callback on lr) - li x28, 12 - li a0, 0x10000400 - li a1, 140 - li a2, 141 - li a3, 142 - sw a3, 0(a0) - li x29, 0x800 //800 external interrupts - csrw mie,x29 - li x29, 0x1808 //008 enable interrupts - lr.w a5, (a0) - csrw mstatus,x29 //Enable external interrupt (will jump instantly due to testbench setup) - nop - nop - nop - nop - nop - nop - sc.w a2, a1, (a0) - li a4, 1 - bne a2, a4, fail - lw a4, 0(a0) - bne a3, a4, fail - - - j pass diff --git a/src/test/cpp/regression/main.cpp b/src/test/cpp/regression/main.cpp index d983e8e..9394144 100644 --- a/src/test/cpp/regression/main.cpp +++ b/src/test/cpp/regression/main.cpp @@ -464,7 +464,6 @@ public: cout << hex << " a7=0x" << regs[17] << " a0=0x" << regs[10] << " a1=0x" << regs[11] << " a2=0x" << regs[12] << dec << endl; } #endif - lrscReserved = false; //Check leguality of the interrupt if(interrupt) { bool hit = false; @@ -835,7 +834,6 @@ public: status.mpie = 1; status.mpp = 0; pcWrite(mepc); - lrscReserved = false; }break; case 0x10200073:{ //SRET if(privilege < 1){ ilegalInstruction(); return;} @@ -844,7 +842,6 @@ public: status.spie = 1; status.spp = 0; pcWrite(sepc); - lrscReserved = false; }break; case 0x00000073:{ //ECALL trap(0, 8+privilege, 0x00000073); //To follow the VexRiscv area saving implementation @@ -909,6 +906,7 @@ public: if(hit){ dWrite(pAddr, 4, i32_rs2); } + lrscReserved = false; rfWrite(rd32, !hit); pcWrite(pc + 4); } diff --git a/src/test/cpp/regression/makefile b/src/test/cpp/regression/makefile index 61fe9d1..2278d06 100644 --- a/src/test/cpp/regression/makefile +++ b/src/test/cpp/regression/makefile @@ -278,5 +278,4 @@ compile: verilate clean: rm -rf obj_dir - rm -f VexRiscv.v*.bin diff --git a/src/test/scala/vexriscv/TestIndividualFeatures.scala b/src/test/scala/vexriscv/TestIndividualFeatures.scala index bbf84a4..9d147bc 100644 --- a/src/test/scala/vexriscv/TestIndividualFeatures.scala +++ b/src/test/scala/vexriscv/TestIndividualFeatures.scala @@ -436,7 +436,7 @@ class DBusDimension extends VexRiscvDimension("DBus") { cacheSize = 512 << r.nextInt(5) wayCount = 1 << r.nextInt(3) }while(cacheSize/wayCount < 512 || (catchAll && cacheSize/wayCount > 4096)) - new VexRiscvPosition("Cached" + "S" + cacheSize + "W" + wayCount + "BPL" + bytePerLine + (if(dBusCmdMasterPipe) "Cmp " else "") + (if(dBusCmdSlavePipe) "Csp " else "") + (if(dBusRspSlavePipe) "Rsp " else "") + (if(relaxedMemoryTranslationRegister) "Rmtr " else "") + (if(earlyWaysHits) "Ewh " else "")) { + new VexRiscvPosition("Cached" + "S" + cacheSize + "W" + wayCount + "BPL" + bytePerLine + (if(dBusCmdMasterPipe) "Cmp " else "") + (if(dBusCmdSlavePipe) "Csp " else "") + (if(dBusRspSlavePipe) "Rsp " else "") + (if(relaxedMemoryTranslationRegister) "Rmtr " else "") + (if(earlyWaysHits) "Ewh " else "") + (if(withAmo) "Amo " else "")) { override def testParam = "DBUS=CACHED " + (if(withLrSc) "LRSC=yes " else "") + (if(withAmo) "AMO=yes " else "") override def applyOn(config: VexRiscvConfig): Unit = { From ff074459ade53eaa99bb35b6451d5d6b9c9801e3 Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Sat, 4 Apr 2020 22:54:35 +0200 Subject: [PATCH 02/91] Fix LrSc for configs without mmu --- src/main/scala/vexriscv/plugin/DBusSimplePlugin.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/scala/vexriscv/plugin/DBusSimplePlugin.scala b/src/main/scala/vexriscv/plugin/DBusSimplePlugin.scala index 0b02da2..4130691 100644 --- a/src/main/scala/vexriscv/plugin/DBusSimplePlugin.scala +++ b/src/main/scala/vexriscv/plugin/DBusSimplePlugin.scala @@ -469,7 +469,7 @@ class DBusSimplePlugin(catchAddressMisaligned : Boolean = false, val atomic = withLrSc generate new Area{ val reserved = RegInit(False) insert(ATOMIC_HIT) := reserved - when(arbitration.isFiring && input(MEMORY_ENABLE) && input(MEMORY_ATOMIC) && !input(MMU_FAULT) && !skipCmd){ + when(arbitration.isFiring && input(MEMORY_ENABLE) && input(MEMORY_ATOMIC) && (if(mmuBus != null) !input(MMU_FAULT) else True) && !skipCmd){ reserved := !input(MEMORY_STORE) } when(input(MEMORY_STORE) && input(MEMORY_ATOMIC) && !input(ATOMIC_HIT)){ From f2ef8e95ab18b65149bec06c4a6bafe269d413d9 Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Sun, 5 Apr 2020 11:38:57 +0200 Subject: [PATCH 03/91] Implement external LrSc --- src/main/scala/vexriscv/ip/DataCache.scala | 48 +++++++++++++------ .../vexriscv/plugin/DBusCachedPlugin.scala | 2 +- 2 files changed, 34 insertions(+), 16 deletions(-) diff --git a/src/main/scala/vexriscv/ip/DataCache.scala b/src/main/scala/vexriscv/ip/DataCache.scala index 0dfe74e..af6f15c 100644 --- a/src/main/scala/vexriscv/ip/DataCache.scala +++ b/src/main/scala/vexriscv/ip/DataCache.scala @@ -26,6 +26,7 @@ case class DataCacheConfig(cacheSize : Int, withLrSc : Boolean = false, withAmo : Boolean = false, withSmp : Boolean = false, + pendingMax : Int = 64, mergeExecuteMemory : Boolean = false){ assert(!(mergeExecuteMemory && (earlyDataMux || earlyWaysHits))) assert(!(earlyDataMux && !earlyWaysHits)) @@ -91,12 +92,12 @@ object DataCacheCpuExecute{ case class DataCacheCpuExecute(p : DataCacheConfig) extends Bundle with IMasterSlave{ val isValid = Bool val address = UInt(p.addressWidth bit) - // val haltIt = Bool + val haltIt = Bool val args = DataCacheCpuExecuteArgs(p) override def asMaster(): Unit = { out(isValid, args, address) - // in(haltIt) + in(haltIt) } } @@ -441,6 +442,22 @@ class DataCache(p : DataCacheConfig) extends Component{ ret } + + io.cpu.execute.haltIt := False + + val rspSync = True + val pending = withSmp generate new Area{ + val counter = Reg(UInt(log2Up(pendingMax) + 1 bits)) init(0) + counter := counter + U(io.mem.cmd.fire && io.mem.cmd.last) - U(io.mem.rsp.valid) + + val full = RegNext(counter.msb) + val last = counter === 1 + + io.cpu.execute.haltIt setWhen(full) + rspSync clearWhen(!last) + } + + val stage0 = new Area{ val mask = io.cpu.execute.size.mux ( U(0) -> B"0001", @@ -566,11 +583,14 @@ class DataCache(p : DataCacheConfig) extends Component{ io.mem.cmd.wr := request.wr io.mem.cmd.mask := mask io.mem.cmd.data := requestDataBypass - if(withExternalLrSc) io.mem.cmd.exclusive := request.isLrsc || request.isAmo + if(withExternalLrSc) io.mem.cmd.exclusive := request.isLrsc || (if(withAmo) request.isAmo else False) when(io.cpu.writeBack.isValid) { when(mmuRsp.isIoAccess || (if(withExternalLrSc) request.isLrsc else False)) { - io.cpu.writeBack.haltIt.clearWhen(request.wr ? io.mem.cmd.ready | io.mem.rsp.valid) + val waitResponse = !request.wr + if(withExternalLrSc) waitResponse setWhen(request.isLrsc) + + io.cpu.writeBack.haltIt.clearWhen(waitResponse ? (io.mem.rsp.valid && rspSync) | io.mem.cmd.ready) io.mem.cmd.valid := !memCmdSent io.mem.cmd.address := mmuRsp.physicalAddress(tagRange.high downto wordRange.low) @@ U(0, wordRange.low bit) @@ -636,6 +656,12 @@ class DataCache(p : DataCacheConfig) extends Component{ io.cpu.writeBack.data := dataMux if(catchAccessError) io.cpu.writeBack.accessError := (waysHits & B(tagsReadRsp.map(_.error))) =/= 0 } + if(withLrSc) when(request.isLrsc && request.wr){ + io.cpu.writeBack.data := B(!lrSc.reserved || !io.mem.rsp.exclusive).resized + } + if(withAmo) when(request.isAmo){ + requestDataBypass := internalAmo.resultReg + } //remove side effects on exceptions when(mmuRsp.refilling || io.cpu.writeBack.accessError || io.cpu.writeBack.mmuException || io.cpu.writeBack.unalignedAccess){ @@ -649,16 +675,8 @@ class DataCache(p : DataCacheConfig) extends Component{ assert(!(io.cpu.writeBack.isValid && !io.cpu.writeBack.haltIt && io.cpu.writeBack.isStuck), "writeBack stuck by another plugin is not allowed") - if(withLrSc){ - when(request.isLrsc && request.wr){ - io.cpu.writeBack.data := (!lrSc.reserved).asBits.resized - } - } - if(withAmo){ - when(request.isAmo){ - requestDataBypass := internalAmo.resultReg - } - } + + } val loader = new Area{ @@ -669,7 +687,7 @@ class DataCache(p : DataCacheConfig) extends Component{ val waysAllocator = Reg(Bits(wayCount bits)) init(1) val error = RegInit(False) - when(valid && io.mem.rsp.valid){ + when(valid && io.mem.rsp.valid && rspSync){ dataWriteCmd.valid := True dataWriteCmd.address := baseAddress(lineRange) @@ counter dataWriteCmd.data := io.mem.rsp.data diff --git a/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala b/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala index 73b0fb0..958d1a8 100644 --- a/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala +++ b/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala @@ -204,7 +204,7 @@ class DBusCachedPlugin(val config : DataCacheConfig, cache.io.cpu.flush.valid := arbitration.isValid && input(MEMORY_MANAGMENT) - arbitration.haltItself setWhen(cache.io.cpu.flush.isStall) + arbitration.haltItself setWhen(cache.io.cpu.flush.isStall || cache.io.cpu.execute.haltIt) if(withLrSc) { cache.io.cpu.execute.args.isLrsc := False From 2eec18de655af449bcec2642e6a855bc8554b43d Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Sun, 5 Apr 2020 16:28:46 +0200 Subject: [PATCH 04/91] LrSc SMP, linux crash in userspace --- src/main/scala/vexriscv/TestsWorkspace.scala | 94 ++++--- src/main/scala/vexriscv/ip/DataCache.scala | 47 ++-- src/test/cpp/raw/lrsc/build/lrsc.asm | 249 +++++++++++-------- src/test/cpp/raw/lrsc/build/lrsc.hex | 64 ++--- src/test/cpp/raw/lrsc/src/crt.S | 38 ++- src/test/cpp/regression/main.cpp | 89 ++++--- src/test/cpp/regression/makefile | 5 + 7 files changed, 362 insertions(+), 224 deletions(-) diff --git a/src/main/scala/vexriscv/TestsWorkspace.scala b/src/main/scala/vexriscv/TestsWorkspace.scala index 8db4316..9c223a7 100644 --- a/src/main/scala/vexriscv/TestsWorkspace.scala +++ b/src/main/scala/vexriscv/TestsWorkspace.scala @@ -26,29 +26,40 @@ import vexriscv.ip._ import spinal.lib.bus.avalon.AvalonMM import spinal.lib.eda.altera.{InterruptReceiverTag, ResetEmitterTag} + +//make clean all SEED=42 MMU=no STOP_ON_ERROR=yes SMP=yes SUPERVISOR=yes REDO=10 DHRYSTONE=no LRSC=yes LINUX_REGRESSION=yes TRACE=yes TRACE_START=1000000000000l FLOW_INFO=no object TestsWorkspace { def main(args: Array[String]) { def configFull = { val config = VexRiscvConfig( plugins = List( - // new IBusSimplePlugin( - // resetVector = 0x80000000l, - // cmdForkOnSecondStage = false, - // cmdForkPersistence = false, - // prediction = NONE, - // historyRamSizeLog2 = 10, - // catchAccessFault = false, - // compressedGen = false, - // busLatencyMin = 1, - // injectorStage = true - // ), + new MmuPlugin( + ioRange = x => x(31 downto 28) === 0xF + ), + //Uncomment the whole IBusSimplePlugin and comment IBusCachedPlugin if you want uncached iBus config + // new IBusSimplePlugin( + // resetVector = 0x80000000l, + // cmdForkOnSecondStage = false, + // cmdForkPersistence = false, + // prediction = DYNAMIC_TARGET, + // historyRamSizeLog2 = 10, + // catchAccessFault = true, + // compressedGen = true, + // busLatencyMin = 1, + // injectorStage = true, + // memoryTranslatorPortConfig = withMmu generate MmuPortConfig( + // portTlbSize = 4 + // ) + // ), + + //Uncomment the whole IBusCachedPlugin and comment IBusSimplePlugin if you want cached iBus config new IBusCachedPlugin( resetVector = 0x80000000l, compressedGen = false, - prediction = NONE, - injectorStage = true, + prediction = STATIC, + injectorStage = false, config = InstructionCacheConfig( - cacheSize = 4096, + cacheSize = 4096*1, bytePerLine = 32, wayCount = 1, addressWidth = 32, @@ -59,20 +70,28 @@ object TestsWorkspace { asyncTagMemory = false, twoCycleRam = false, twoCycleCache = true + // ) ), - memoryTranslatorPortConfig = MemoryTranslatorPortConfig( + memoryTranslatorPortConfig = MmuPortConfig( portTlbSize = 4 ) ), -// ).newTightlyCoupledPort(TightlyCoupledPortParameter("iBusTc", a => a(30 downto 28) === 0x0 && a(5))), - // new DBusSimplePlugin( - // catchAddressMisaligned = true, - // catchAccessFault = false, - // earlyInjection = false - // ), + // ).newTightlyCoupledPort(TightlyCoupledPortParameter("iBusTc", a => a(30 downto 28) === 0x0 && a(5))), + // new DBusSimplePlugin( + // catchAddressMisaligned = true, + // catchAccessFault = true, + // earlyInjection = false, + // withLrSc = true, + // memoryTranslatorPortConfig = withMmu generate MmuPortConfig( + // portTlbSize = 4 + // ) + // ), new DBusCachedPlugin( + dBusCmdMasterPipe = true, + dBusCmdSlavePipe = true, + dBusRspSlavePipe = true, config = new DataCacheConfig( - cacheSize = 4096, + cacheSize = 4096*1, bytePerLine = 32, wayCount = 1, addressWidth = 32, @@ -81,33 +100,34 @@ object TestsWorkspace { catchAccessError = true, catchIllegal = true, catchUnaligned = true, - withLrSc = true + withLrSc = true, + withAmo = false, + withSmp = true + // ) ), - // memoryTranslatorPortConfig = null - memoryTranslatorPortConfig = MemoryTranslatorPortConfig( - portTlbSize = 6 + memoryTranslatorPortConfig = MmuPortConfig( + portTlbSize = 4 ) ), - // new StaticMemoryTranslatorPlugin( + + // new MemoryTranslatorPlugin( + // tlbSize = 32, + // virtualRange = _(31 downto 28) === 0xC, // ioRange = _(31 downto 28) === 0xF // ), - new MemoryTranslatorPlugin( - tlbSize = 32, - virtualRange = _(31 downto 28) === 0xC, - ioRange = _(31 downto 28) === 0xF - ), + new DecoderSimplePlugin( catchIllegalInstruction = true ), new RegFilePlugin( - regFileReadyKind = plugin.ASYNC, + regFileReadyKind = plugin.SYNC, zeroBoot = true ), new IntAluPlugin, new SrcPlugin( separatedAddSub = false ), - new FullBarrelShifterPlugin(earlyInjection = true), + new FullBarrelShifterPlugin(earlyInjection = false), // new LightShifterPlugin, new HazardSimplePlugin( bypassExecute = true, @@ -128,7 +148,7 @@ object TestsWorkspace { divUnrollFactor = 1 ), // new DivPlugin, - new CsrPlugin(CsrPluginConfig.all(0x80000020l)), + new CsrPlugin(CsrPluginConfig.all2(0x80000020l).copy(ebreakGen = false)), // new CsrPlugin(//CsrPluginConfig.all2(0x80000020l).copy(ebreakGen = true)/* // CsrPluginConfig( // catchIllegalAccess = false, @@ -154,9 +174,9 @@ object TestsWorkspace { // )), new DebugPlugin(ClockDomain.current.clone(reset = Bool().setName("debugReset"))), new BranchPlugin( - earlyBranch = true, + earlyBranch = false, catchAddressMisaligned = true, - fenceiGenAsAJump = true + fenceiGenAsAJump = false ), new YamlPlugin("cpu0.yaml") ) diff --git a/src/main/scala/vexriscv/ip/DataCache.scala b/src/main/scala/vexriscv/ip/DataCache.scala index af6f15c..758186a 100644 --- a/src/main/scala/vexriscv/ip/DataCache.scala +++ b/src/main/scala/vexriscv/ip/DataCache.scala @@ -175,6 +175,7 @@ case class DataCacheMemCmd(p : DataCacheConfig) extends Bundle{ val last = Bool } case class DataCacheMemRsp(p : DataCacheConfig) extends Bundle{ + val last = Bool() val data = Bits(p.memDataWidth bit) val error = Bool val exclusive = p.withSmp generate Bool() @@ -446,15 +447,18 @@ class DataCache(p : DataCacheConfig) extends Component{ io.cpu.execute.haltIt := False val rspSync = True + val rspLast = True + val memCmdSent = RegInit(False) setWhen (io.mem.cmd.ready) clearWhen (!io.cpu.writeBack.isStuck) val pending = withSmp generate new Area{ val counter = Reg(UInt(log2Up(pendingMax) + 1 bits)) init(0) - counter := counter + U(io.mem.cmd.fire && io.mem.cmd.last) - U(io.mem.rsp.valid) + counter := counter + U(io.mem.cmd.fire && io.mem.cmd.last) - U(io.mem.rsp.valid && io.mem.rsp.last) val full = RegNext(counter.msb) val last = counter === 1 io.cpu.execute.haltIt setWhen(full) - rspSync clearWhen(!last) + rspSync clearWhen(!last || !memCmdSent) + rspLast clearWhen(!last) } @@ -568,8 +572,15 @@ class DataCache(p : DataCacheConfig) extends Component{ } + val cpuWriteToCache = False + when(cpuWriteToCache){ + dataWriteCmd.valid setWhen(request.wr && waysHit) + dataWriteCmd.address := mmuRsp.physicalAddress(lineRange.high downto wordRange.low) + dataWriteCmd.data := requestDataBypass + dataWriteCmd.mask := mask + dataWriteCmd.way := waysHits + } - val memCmdSent = RegInit(False) setWhen (io.mem.cmd.ready) clearWhen (!io.cpu.writeBack.isStuck) io.cpu.redo := False io.cpu.writeBack.accessError := False io.cpu.writeBack.mmuException := io.cpu.writeBack.isValid && (if(catchIllegal) mmuRsp.exception || (!mmuRsp.allowWrite && request.wr) || (!mmuRsp.allowRead && (!request.wr || isAmo)) else False) @@ -579,14 +590,16 @@ class DataCache(p : DataCacheConfig) extends Component{ io.mem.cmd.valid := False io.mem.cmd.address.assignDontCare() io.mem.cmd.length.assignDontCare() - io.mem.cmd.last.assignDontCare() + io.mem.cmd.last := True io.mem.cmd.wr := request.wr io.mem.cmd.mask := mask io.mem.cmd.data := requestDataBypass if(withExternalLrSc) io.mem.cmd.exclusive := request.isLrsc || (if(withAmo) request.isAmo else False) + val bypassCache = mmuRsp.isIoAccess || (if(withExternalLrSc) request.isLrsc else False) + when(io.cpu.writeBack.isValid) { - when(mmuRsp.isIoAccess || (if(withExternalLrSc) request.isLrsc else False)) { + when(bypassCache) { val waitResponse = !request.wr if(withExternalLrSc) waitResponse setWhen(request.isLrsc) @@ -595,7 +608,6 @@ class DataCache(p : DataCacheConfig) extends Component{ io.mem.cmd.valid := !memCmdSent io.mem.cmd.address := mmuRsp.physicalAddress(tagRange.high downto wordRange.low) @@ U(0, wordRange.low bit) io.mem.cmd.length := 0 - io.mem.cmd.last := True if(withInternalLrSc) when(request.isLrsc && !lrSc.reserved){ io.mem.cmd.valid := False @@ -603,18 +615,12 @@ class DataCache(p : DataCacheConfig) extends Component{ } } otherwise { when(waysHit || request.wr && !isAmo) { //Do not require a cache refill ? - //Data cache update - dataWriteCmd.valid setWhen(request.wr && waysHit) - dataWriteCmd.address := mmuRsp.physicalAddress(lineRange.high downto wordRange.low) - dataWriteCmd.data := requestDataBypass - dataWriteCmd.mask := mask - dataWriteCmd.way := waysHits + cpuWriteToCache := True //Write through io.mem.cmd.valid setWhen(request.wr) io.mem.cmd.address := mmuRsp.physicalAddress(tagRange.high downto wordRange.low) @@ U(0, wordRange.low bit) io.mem.cmd.length := 0 - io.mem.cmd.last := True io.cpu.writeBack.haltIt clearWhen(!request.wr || io.mem.cmd.ready) if(withAmo) when(isAmo){ @@ -642,22 +648,29 @@ class DataCache(p : DataCacheConfig) extends Component{ io.mem.cmd.wr := False io.mem.cmd.address := mmuRsp.physicalAddress(tagRange.high downto lineRange.low) @@ U(0,lineRange.low bit) io.mem.cmd.length := p.burstLength-1 - io.mem.cmd.last := True loaderValid setWhen(io.mem.cmd.ready) } } } - when(mmuRsp.isIoAccess){ + when(bypassCache){ io.cpu.writeBack.data := io.mem.rsp.data if(catchAccessError) io.cpu.writeBack.accessError := io.mem.rsp.valid && io.mem.rsp.error } otherwise { io.cpu.writeBack.data := dataMux if(catchAccessError) io.cpu.writeBack.accessError := (waysHits & B(tagsReadRsp.map(_.error))) =/= 0 } + if(withLrSc) when(request.isLrsc && request.wr){ - io.cpu.writeBack.data := B(!lrSc.reserved || !io.mem.rsp.exclusive).resized + val success = CombInit(lrSc.reserved) + if(withExternalLrSc) success clearWhen(!io.mem.rsp.exclusive) + + io.cpu.writeBack.data := B(!success).resized + + if(withExternalLrSc) when(io.cpu.writeBack.isValid && io.mem.rsp.valid && rspSync && success && waysHit){ + cpuWriteToCache := True + } } if(withAmo) when(request.isAmo){ requestDataBypass := internalAmo.resultReg @@ -687,7 +700,7 @@ class DataCache(p : DataCacheConfig) extends Component{ val waysAllocator = Reg(Bits(wayCount bits)) init(1) val error = RegInit(False) - when(valid && io.mem.rsp.valid && rspSync){ + when(valid && io.mem.rsp.valid && rspLast){ dataWriteCmd.valid := True dataWriteCmd.address := baseAddress(lineRange) @@ counter dataWriteCmd.data := io.mem.rsp.data diff --git a/src/test/cpp/raw/lrsc/build/lrsc.asm b/src/test/cpp/raw/lrsc/build/lrsc.asm index 4ff24b4..a2ba4c7 100644 --- a/src/test/cpp/raw/lrsc/build/lrsc.asm +++ b/src/test/cpp/raw/lrsc/build/lrsc.asm @@ -5,7 +5,7 @@ build/lrsc.elf: file format elf32-littleriscv Disassembly of section .crt_section: 80000000 : -80000000: 04c0006f j 8000004c <_start> +80000000: 06c0006f j 8000006c <_start> 80000004: 00000013 nop 80000008: 00000013 nop 8000000c: 00000013 nop @@ -29,115 +29,152 @@ Disassembly of section .crt_section: 80000044: 341e9073 csrw mepc,t4 80000048: 30200073 mret -8000004c <_start>: -8000004c: 00100e13 li t3,1 -80000050: 10000537 lui a0,0x10000 -80000054: 06400593 li a1,100 -80000058: 06500613 li a2,101 -8000005c: 06600693 li a3,102 -80000060: 00d52023 sw a3,0(a0) # 10000000 -80000064: 18b5262f sc.w a2,a1,(a0) -80000068: 00100713 li a4,1 -8000006c: 14e61a63 bne a2,a4,800001c0 -80000070: 00052703 lw a4,0(a0) -80000074: 14e69663 bne a3,a4,800001c0 -80000078: 00200e13 li t3,2 -8000007c: 10000537 lui a0,0x10000 -80000080: 00450513 addi a0,a0,4 # 10000004 -80000084: 06700593 li a1,103 -80000088: 06800613 li a2,104 -8000008c: 06900693 li a3,105 -80000090: 00d52023 sw a3,0(a0) -80000094: 18b5262f sc.w a2,a1,(a0) -80000098: 00100713 li a4,1 -8000009c: 12e61263 bne a2,a4,800001c0 -800000a0: 00052703 lw a4,0(a0) -800000a4: 10e69e63 bne a3,a4,800001c0 -800000a8: 00300e13 li t3,3 -800000ac: 10000537 lui a0,0x10000 -800000b0: 00450513 addi a0,a0,4 # 10000004 -800000b4: 06700593 li a1,103 -800000b8: 06800613 li a2,104 -800000bc: 06900693 li a3,105 -800000c0: 18b5262f sc.w a2,a1,(a0) -800000c4: 00100713 li a4,1 -800000c8: 0ee61c63 bne a2,a4,800001c0 +8000004c : +8000004c: 200002b7 lui t0,0x20000 +80000050: 00001337 lui t1,0x1 +80000054: 02000393 li t2,32 + +80000058 : +80000058: 0002ae03 lw t3,0(t0) # 20000000 +8000005c: 006282b3 add t0,t0,t1 +80000060: fff38393 addi t2,t2,-1 +80000064: fe039ae3 bnez t2,80000058 +80000068: 00008067 ret + +8000006c <_start>: +8000006c: 00100e13 li t3,1 +80000070: 10000537 lui a0,0x10000 +80000074: 06400593 li a1,100 +80000078: 06500613 li a2,101 +8000007c: 06600693 li a3,102 +80000080: 00d52023 sw a3,0(a0) # 10000000 +80000084: 18b5262f sc.w a2,a1,(a0) +80000088: 00100713 li a4,1 +8000008c: 18e61863 bne a2,a4,8000021c +80000090: 00052703 lw a4,0(a0) +80000094: 18e69463 bne a3,a4,8000021c + +80000098 : +80000098: 00200e13 li t3,2 +8000009c: 10000537 lui a0,0x10000 +800000a0: 00450513 addi a0,a0,4 # 10000004 +800000a4: 06700593 li a1,103 +800000a8: 06800613 li a2,104 +800000ac: 06900693 li a3,105 +800000b0: 00d52023 sw a3,0(a0) +800000b4: 18b5262f sc.w a2,a1,(a0) +800000b8: 00100713 li a4,1 +800000bc: 16e61063 bne a2,a4,8000021c +800000c0: 00052703 lw a4,0(a0) +800000c4: 14e69c63 bne a3,a4,8000021c +800000c8: f85ff0ef jal ra,8000004c 800000cc: 00052703 lw a4,0(a0) -800000d0: 0ee69863 bne a3,a4,800001c0 -800000d4: 00400e13 li t3,4 +800000d0: 14e69663 bne a3,a4,8000021c + +800000d4 : +800000d4: 00300e13 li t3,3 800000d8: 10000537 lui a0,0x10000 -800000dc: 00850513 addi a0,a0,8 # 10000008 -800000e0: 06a00593 li a1,106 -800000e4: 06b00613 li a2,107 -800000e8: 06c00693 li a3,108 -800000ec: 00d52023 sw a3,0(a0) -800000f0: 100527af lr.w a5,(a0) -800000f4: 18b5262f sc.w a2,a1,(a0) -800000f8: 0cd79463 bne a5,a3,800001c0 -800000fc: 0c061263 bnez a2,800001c0 -80000100: 00052703 lw a4,0(a0) -80000104: 0ae59e63 bne a1,a4,800001c0 -80000108: 00500e13 li t3,5 -8000010c: 10000537 lui a0,0x10000 -80000110: 00850513 addi a0,a0,8 # 10000008 -80000114: 06d00593 li a1,109 -80000118: 06e00613 li a2,110 -8000011c: 06f00693 li a3,111 -80000120: 00d52023 sw a3,0(a0) -80000124: 18b5262f sc.w a2,a1,(a0) -80000128: 08060c63 beqz a2,800001c0 -8000012c: 00052703 lw a4,0(a0) -80000130: 08e69863 bne a3,a4,800001c0 -80000134: 00700e13 li t3,7 -80000138: 10000537 lui a0,0x10000 -8000013c: 01450513 addi a0,a0,20 # 10000014 -80000140: 07800593 li a1,120 -80000144: 07900613 li a2,121 -80000148: 07a00693 li a3,122 -8000014c: 01000e93 li t4,16 +800000dc: 00450513 addi a0,a0,4 # 10000004 +800000e0: 06700593 li a1,103 +800000e4: 06800613 li a2,104 +800000e8: 06900693 li a3,105 +800000ec: 18b5262f sc.w a2,a1,(a0) +800000f0: 00100713 li a4,1 +800000f4: 12e61463 bne a2,a4,8000021c +800000f8: 00052703 lw a4,0(a0) +800000fc: 12e69063 bne a3,a4,8000021c +80000100: f4dff0ef jal ra,8000004c +80000104: 00052703 lw a4,0(a0) +80000108: 10e69a63 bne a3,a4,8000021c -80000150 : -80000150: 00d52023 sw a3,0(a0) -80000154: 100527af lr.w a5,(a0) -80000158: 18b5262f sc.w a2,a1,(a0) -8000015c: 06d79263 bne a5,a3,800001c0 -80000160: 06061063 bnez a2,800001c0 -80000164: 00052703 lw a4,0(a0) -80000168: 04e59c63 bne a1,a4,800001c0 -8000016c: fffe8e93 addi t4,t4,-1 -80000170: 00450513 addi a0,a0,4 -80000174: 00358593 addi a1,a1,3 -80000178: 00360613 addi a2,a2,3 -8000017c: 00368693 addi a3,a3,3 -80000180: fc0e98e3 bnez t4,80000150 -80000184: 00900e13 li t3,9 +8000010c : +8000010c: 00400e13 li t3,4 +80000110: 10000537 lui a0,0x10000 +80000114: 00850513 addi a0,a0,8 # 10000008 +80000118: 06a00593 li a1,106 +8000011c: 06b00613 li a2,107 +80000120: 06c00693 li a3,108 +80000124: 00d52023 sw a3,0(a0) +80000128: 100527af lr.w a5,(a0) +8000012c: 18b5262f sc.w a2,a1,(a0) +80000130: 0ed79663 bne a5,a3,8000021c +80000134: 0e061463 bnez a2,8000021c +80000138: 00052703 lw a4,0(a0) +8000013c: 0ee59063 bne a1,a4,8000021c +80000140: f0dff0ef jal ra,8000004c +80000144: 00052703 lw a4,0(a0) +80000148: 0ce59a63 bne a1,a4,8000021c + +8000014c : +8000014c: 00500e13 li t3,5 +80000150: 10000537 lui a0,0x10000 +80000154: 00850513 addi a0,a0,8 # 10000008 +80000158: 06d00593 li a1,109 +8000015c: 06e00613 li a2,110 +80000160: 06f00693 li a3,111 +80000164: 00d52023 sw a3,0(a0) +80000168: 18b5262f sc.w a2,a1,(a0) +8000016c: 0a060863 beqz a2,8000021c +80000170: 00052703 lw a4,0(a0) +80000174: 0ae69463 bne a3,a4,8000021c +80000178: ed5ff0ef jal ra,8000004c +8000017c: 00052703 lw a4,0(a0) +80000180: 08e69e63 bne a3,a4,8000021c +80000184: 00700e13 li t3,7 80000188: 10000537 lui a0,0x10000 -8000018c: 10050513 addi a0,a0,256 # 10000100 -80000190: 07b00593 li a1,123 -80000194: 07c00613 li a2,124 -80000198: 07d00693 li a3,125 -8000019c: 00d52023 sw a3,0(a0) -800001a0: 100527af lr.w a5,(a0) -800001a4: 00000073 ecall -800001a8: 18b527af sc.w a5,a1,(a0) -800001ac: 00000713 li a4,0 -800001b0: 00e79863 bne a5,a4,800001c0 +8000018c: 01450513 addi a0,a0,20 # 10000014 +80000190: 07800593 li a1,120 +80000194: 07900613 li a2,121 +80000198: 07a00693 li a3,122 +8000019c: 01000e93 li t4,16 + +800001a0 : +800001a0: 00d52023 sw a3,0(a0) +800001a4: 100527af lr.w a5,(a0) +800001a8: 18b5262f sc.w a2,a1,(a0) +800001ac: 06d79863 bne a5,a3,8000021c +800001b0: 06061663 bnez a2,8000021c 800001b4: 00052703 lw a4,0(a0) -800001b8: 00e59463 bne a1,a4,800001c0 -800001bc: 0100006f j 800001cc +800001b8: 06e59263 bne a1,a4,8000021c +800001bc: fffe8e93 addi t4,t4,-1 +800001c0: 00450513 addi a0,a0,4 +800001c4: 00358593 addi a1,a1,3 +800001c8: 00360613 addi a2,a2,3 +800001cc: 00368693 addi a3,a3,3 +800001d0: fc0e98e3 bnez t4,800001a0 -800001c0 : -800001c0: f0100137 lui sp,0xf0100 -800001c4: f2410113 addi sp,sp,-220 # f00fff24 -800001c8: 01c12023 sw t3,0(sp) +800001d4 : +800001d4: 00900e13 li t3,9 +800001d8: 10000537 lui a0,0x10000 +800001dc: 10050513 addi a0,a0,256 # 10000100 +800001e0: 07b00593 li a1,123 +800001e4: 07c00613 li a2,124 +800001e8: 07d00693 li a3,125 +800001ec: 00d52023 sw a3,0(a0) +800001f0: 100527af lr.w a5,(a0) +800001f4: 00000073 ecall +800001f8: 18b527af sc.w a5,a1,(a0) +800001fc: 00000713 li a4,0 +80000200: 00e79e63 bne a5,a4,8000021c +80000204: 00052703 lw a4,0(a0) +80000208: 00e59a63 bne a1,a4,8000021c +8000020c: e41ff0ef jal ra,8000004c +80000210: 00052703 lw a4,0(a0) +80000214: 00e59463 bne a1,a4,8000021c +80000218: 0100006f j 80000228 -800001cc : -800001cc: f0100137 lui sp,0xf0100 -800001d0: f2010113 addi sp,sp,-224 # f00fff20 -800001d4: 00012023 sw zero,0(sp) -800001d8: 00000013 nop -800001dc: 00000013 nop -800001e0: 00000013 nop -800001e4: 00000013 nop -800001e8: 00000013 nop -800001ec: 00000013 nop +8000021c : +8000021c: f0100137 lui sp,0xf0100 +80000220: f2410113 addi sp,sp,-220 # f00fff24 +80000224: 01c12023 sw t3,0(sp) + +80000228 : +80000228: f0100137 lui sp,0xf0100 +8000022c: f2010113 addi sp,sp,-224 # f00fff20 +80000230: 00012023 sw zero,0(sp) +80000234: 00000013 nop +80000238: 00000013 nop +8000023c: 00000013 nop +80000240: 00000013 nop +80000244: 00000013 nop +80000248: 00000013 nop diff --git a/src/test/cpp/raw/lrsc/build/lrsc.hex b/src/test/cpp/raw/lrsc/build/lrsc.hex index 1c1cd4c..b0ee273 100644 --- a/src/test/cpp/raw/lrsc/build/lrsc.hex +++ b/src/test/cpp/raw/lrsc/build/lrsc.hex @@ -1,34 +1,40 @@ :0200000480007A -:100000006F00C00413000000130000001300000084 +:100000006F00C00613000000130000001300000082 :100010001300000013000000130000001300000094 :10002000F32E003093FE0E08638A0E00B72E0000F8 :10003000938E0E8073900E3073002030F32E1034A8 -:10004000938E4E0073901E3473002030130E1000F8 -:100050003705001093054006130650069306600608 -:100060002320D5002F26B51813071000631AE614B5 -:10007000032705006396E614130E200037050010D1 -:100080001305450093057006130680069306900637 -:100090002320D5002F26B518130710006312E6128F -:1000A00003270500639EE610130E3000370500108D -:1000B0001305450093057006130680069306900607 -:1000C0002F26B51813071000631CE60E0327050042 -:1000D0006398E60E130E40003705001013058500E7 -:1000E0009305A0061306B0069306C0062320D5008C -:1000F000AF2705102F26B5186394D70C6312060C92 -:1001000003270500639EE50A130E50003705001013 -:10011000130585009305D0061306E0069306F00646 -:100120002320D5002F26B518630C060803270500E9 -:100130006398E608130E700037050010130545019B -:1001400093058007130690079306A007930E0001FE -:100150002320D500AF2705102F26B5186392D706A8 -:100160006310060603270500639CE504938EFEFFDB -:100170001305450093853500130636009386360037 -:10018000E3980EFC130E90003705001013050510C0 -:100190009305B0071306C0079306D0072320D500A8 -:1001A000AF27051073000000AF27B5181307000034 -:1001B0006398E700032705006394E5006F000001E2 -:1001C000370110F0130141F22320C101370110F073 -:1001D000130101F2232001001300000013000000AE -:1001E00013000000130000001300000013000000C3 -:040000058000004C2B +:10004000938E4E0073901E3473002030B702002050 +:10005000371300009303000203AE0200B382620074 +:100060009383F3FFE39A03FE67800000130E1000F2 +:1000700037050010930540061306500693066006E8 +:100080002320D5002F26B518130710006318E61893 +:10009000032705006394E618130E200037050010AF +:1000A0001305450093057006130680069306900617 +:1000B0002320D5002F26B518130710006310E6166D +:1000C00003270500639CE614EFF05FF803270500A3 +:1000D0006396E614130E3000370500101305450033 +:1000E0009305700613068006930690062F26B51812 +:1000F000130710006314E612032705006390E6124D +:10010000EFF0DFF403270500639AE610130E4000BA +:1001100037050010130585009305A0061306B006E9 +:100120009306C0062320D500AF2705102F26B5184B +:100130006396D70E6314060E032705006390E50E41 +:10014000EFF0DFF003270500639AE50C130E500073 +:1001500037050010130585009305D0061306E00649 +:100160009306F0062320D5002F26B5186308060A4B +:10017000032705006394E60AEFF05FED032705000F +:10018000639EE608130E7000370500101305450145 +:1001900093058007130690079306A007930E0001AE +:1001A0002320D500AF2705102F26B5186398D70652 +:1001B00063160606032705006392E506938EFEFF8D +:1001C00013054500938535001306360093863600E7 +:1001D000E3980EFC130E9000370500101305051070 +:1001E0009305B0071306C0079306D0072320D50058 +:1001F000AF27051073000000AF27B51813070000E4 +:10020000639EE70003270500639AE500EFF01FE413 +:10021000032705006394E5006F000001370110F02B +:10022000130141F22320C101370110F0130101F243 +:100230002320010013000000130000001300000041 +:0C02400013000000130000001300000079 +:040000058000006C0B :00000001FF diff --git a/src/test/cpp/raw/lrsc/src/crt.S b/src/test/cpp/raw/lrsc/src/crt.S index 0cddcd3..a19663f 100644 --- a/src/test/cpp/raw/lrsc/src/crt.S +++ b/src/test/cpp/raw/lrsc/src/crt.S @@ -25,8 +25,19 @@ notExternalInterrupt: csrw mepc, x29 mret +flush: + li t0, 0x20000000 + li t1, 0x1000 + li t2, 32 +flushLoop: + lw t3, 0(t0) + add t0, t0, t1 + addi t2,t2,-1 + bnez t2, flushLoop + ret + _start: -//Test 1 SC on unreserved area should fail and not write memory +test1: //Test 1 SC on unreserved area should fail and not write memory li x28, 1 li a0, 0x10000000 li a1, 100 @@ -39,7 +50,7 @@ _start: lw a4, 0(a0) bne a3, a4, fail -//Test 2 SC on another unreserved area should fail and not write memory +test2: //Test 2 SC on another unreserved area should fail and not write memory li x28, 2 li a0, 0x10000004 li a1, 103 @@ -51,9 +62,12 @@ _start: bne a2, a4, fail lw a4, 0(a0) bne a3, a4, fail + call flush + lw a4, 0(a0) + bne a3, a4, fail -//Test 3 retrying SC on unreserved area should fail and not write memory +test3: //Test 3 retrying SC on unreserved area should fail and not write memory li x28, 3 li a0, 0x10000004 li a1, 103 @@ -64,9 +78,12 @@ _start: bne a2, a4, fail lw a4, 0(a0) bne a3, a4, fail + call flush + lw a4, 0(a0) + bne a3, a4, fail -//Test 4 SC on reserved area should pass and should be written write memory +test4: //Test 4 SC on reserved area should pass and should be written write memory li x28, 4 li a0, 0x10000008 li a1, 106 @@ -79,9 +96,12 @@ _start: bne a2, x0, fail lw a4, 0(a0) bne a1, a4, fail + call flush + lw a4, 0(a0) + bne a1, a4, fail -//Test 5 redo SC on reserved area should fail +test5: //Test 5 redo SC on reserved area should fail li x28, 5 li a0, 0x10000008 li a1, 109 @@ -92,6 +112,9 @@ _start: beq a2, x0, fail lw a4, 0(a0) bne a3, a4, fail + call flush + lw a4, 0(a0) + bne a3, a4, fail //Test 7 do a lot of allocation to clear the entries @@ -131,7 +154,7 @@ test7: bne a5, a4, fail*/ -//Test 9 SC should pass after a context switching +test9: //Test 9 SC should pass after a context switching li x28, 9 li a0, 0x10000100 li a1, 123 @@ -145,6 +168,9 @@ test7: bne a5, a4, fail lw a4, 0(a0) bne a1, a4, fail + call flush + lw a4, 0(a0) + bne a1, a4, fail diff --git a/src/test/cpp/regression/main.cpp b/src/test/cpp/regression/main.cpp index 9394144..8b959e2 100644 --- a/src/test/cpp/regression/main.cpp +++ b/src/test/cpp/regression/main.cpp @@ -1409,7 +1409,7 @@ public: virtual void fillSimELements(); void dump(int i){ #ifdef TRACE - if(i == TRACE_START && i != 0) cout << "START TRACE" << endl; + if(i == TRACE_START && i != 0) cout << "**" << endl << "**" << endl << "**" << endl << "**" << endl << "**" << endl << "START TRACE" << endl; if(i >= TRACE_START) tfp->dump(i); #endif } @@ -1518,7 +1518,7 @@ public: currentTime = i; #ifdef FLOW_INFO - if(i % 2000000 == 0) cout << "PROGRESS TRACE_START=" << i << endl; + if(i % 2000000 == 0) cout << "**" << endl << "**" << endl << "**" << endl << "**" << endl << "**" << endl << "PROGRESS TRACE_START=" << i << endl; #endif @@ -2314,16 +2314,25 @@ public: #ifdef DBUS_CACHED //#include "VVexRiscv_DataCache.h" +#include + +struct DBusCachedTask{ + uint32_t data; + bool error; + bool last; + bool exclusive; +}; class DBusCached : public SimElement{ public: - uint32_t address; - bool error_next = false; - uint32_t pendingCount = 0; - bool wr; + queue rsps; + + bool reservationValid = false; + uint32_t reservationAddress; Workspace *ws; VVexRiscv* top; + DBusCached(Workspace* ws){ this->ws = ws; this->top = ws->top; @@ -2345,41 +2354,63 @@ public: VL_IN8(io_cpu_execute_args_invalidate,0,0); VL_IN8(io_cpu_execute_args_way,0,0); -// if(top->VexRiscv->dataCache_1->io_cpu_execute_isValid && !top->VexRiscv->dataCache_1->io_cpu_execute_isStuck -// && top->VexRiscv->dataCache_1->io_cpu_execute_args_wr){ -// if(top->VexRiscv->dataCache_1->io_cpu_execute_args_address == 0x80025978) -// cout << "WR 0x80025978 = " << hex << setw(8) << top->VexRiscv->dataCache_1->io_cpu_execute_args_data << endl; -// if(top->VexRiscv->dataCache_1->io_cpu_execute_args_address == 0x8002596c) -// cout << "WR 0x8002596c = " << hex << setw(8) << top->VexRiscv->dataCache_1->io_cpu_execute_args_data << endl; -// } if (top->dBus_cmd_valid && top->dBus_cmd_ready) { - if(pendingCount == 0){ - pendingCount = top->dBus_cmd_payload_length+1; - address = top->dBus_cmd_payload_address; - wr = top->dBus_cmd_payload_wr; - } - if(top->dBus_cmd_payload_wr){ - ws->dBusAccess(address,top->dBus_cmd_payload_wr,2,top->dBus_cmd_payload_mask,&top->dBus_cmd_payload_data,&error_next); - address += 4; - pendingCount--; - } + if(top->dBus_cmd_payload_wr){ + #ifndef SMP + bool error; + ws->dBusAccess(top->dBus_cmd_payload_address,1,2,top->dBus_cmd_payload_mask,&top->dBus_cmd_payload_data,&error); + #else + bool cancel = false; + DBusCachedTask rsp; + if(top->dBus_cmd_payload_exclusive){ + bool hit = reservationValid && reservationAddress == top->dBus_cmd_payload_address; + rsp.exclusive = hit; + cancel = !hit; + reservationValid = false; + } + if(!cancel) ws->dBusAccess(top->dBus_cmd_payload_address,1,2,top->dBus_cmd_payload_mask,&top->dBus_cmd_payload_data,&rsp.error); + rsp.last = true; + rsps.push(rsp); + #endif + } else { + for(int beat = 0;beat <= top->dBus_cmd_payload_length;beat++){ + DBusCachedTask rsp; + ws->dBusAccess(top->dBus_cmd_payload_address + beat * 4,0,2,0,&rsp.data,&rsp.error); + rsp.last = beat == top->dBus_cmd_payload_length; + #ifdef SMP + rsp.exclusive = true; + reservationValid = true; + reservationAddress = top->dBus_cmd_payload_address; + #endif + rsps.push(rsp); + } + } } } virtual void postCycle(){ - if(pendingCount != 0 && !wr && (!ws->dStall || VL_RANDOM_I(7) < 100)){ - ws->dBusAccess(address,0,2,0,&top->dBus_rsp_payload_data,&error_next); - top->dBus_rsp_payload_error = error_next; + + if(!rsps.empty() && (!ws->dStall || VL_RANDOM_I(7) < 100)){ + DBusCachedTask rsp = rsps.front(); + rsps.pop(); top->dBus_rsp_valid = 1; - address += 4; - pendingCount--; + top->dBus_rsp_payload_error = rsp.error; + top->dBus_rsp_payload_data = rsp.data; + top->dBus_rsp_payload_last = rsp.last; + #ifdef SMP + top->dBus_rsp_payload_exclusive = rsp.exclusive; + #endif } else{ top->dBus_rsp_valid = 0; top->dBus_rsp_payload_data = VL_RANDOM_I(32); top->dBus_rsp_payload_error = VL_RANDOM_I(1); + top->dBus_rsp_payload_last = VL_RANDOM_I(1); + #ifdef SMP + top->dBus_rsp_payload_exclusive = VL_RANDOM_I(1); + #endif } - top->dBus_cmd_ready = (ws->dStall ? VL_RANDOM_I(7) < 100 : 1) && (pendingCount == 0 || wr); + top->dBus_cmd_ready = (ws->dStall ? VL_RANDOM_I(7) < 100 : 1); } }; #endif diff --git a/src/test/cpp/regression/makefile b/src/test/cpp/regression/makefile index 2278d06..fba824e 100644 --- a/src/test/cpp/regression/makefile +++ b/src/test/cpp/regression/makefile @@ -15,6 +15,7 @@ CSR_SKIP_TEST?=no EBREAK?=no FENCEI?=no MMU?=yes +SMP?=no SEED?=no LRSC?=no AMO?=no @@ -217,6 +218,10 @@ ifeq ($(MMU),yes) ADDCFLAGS += -CFLAGS -DMMU endif +ifeq ($(SMP),yes) + ADDCFLAGS += -CFLAGS -DSMP +endif + ifeq ($(MUL),yes) ADDCFLAGS += -CFLAGS -DMUL endif From ca72a421bec72d7cdf4fbc2e3c8b50c12e65031e Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Sun, 5 Apr 2020 21:45:45 +0200 Subject: [PATCH 05/91] LrSc align software model to the hardware. Linux OK --- src/test/cpp/regression/main.cpp | 24 +++++++++--------------- 1 file changed, 9 insertions(+), 15 deletions(-) diff --git a/src/test/cpp/regression/main.cpp b/src/test/cpp/regression/main.cpp index 8b959e2..62c58f2 100644 --- a/src/test/cpp/regression/main.cpp +++ b/src/test/cpp/regression/main.cpp @@ -376,6 +376,7 @@ public: bool lrscReserved; + uint32_t lrscReservedAddress; RiscvGolden() { pc = 0x80000000; @@ -891,6 +892,7 @@ public: trap(0, 5, address); } else { lrscReserved = true; + lrscReservedAddress = pAddr; rfWrite(rd32, data); pcWrite(pc + 4); } @@ -902,7 +904,7 @@ public: trap(0, 6, address); } else { if(v2p(address, &pAddr, WRITE)){ trap(0, 15, address); return; } - bool hit = lrscReserved; + bool hit = lrscReserved && lrscReservedAddress == pAddr; if(hit){ dWrite(pAddr, 4, i32_rs2); } @@ -1407,7 +1409,7 @@ public: virtual void pass(){ throw success();} virtual void fail(){ throw std::exception();} virtual void fillSimELements(); - void dump(int i){ + void dump(uint64_t i){ #ifdef TRACE if(i == TRACE_START && i != 0) cout << "**" << endl << "**" << endl << "**" << endl << "**" << endl << "**" << endl << "START TRACE" << endl; if(i >= TRACE_START) tfp->dump(i); @@ -2344,16 +2346,6 @@ public: } virtual void preCycle(){ - VL_IN8(io_cpu_execute_isValid,0,0); - VL_IN8(io_cpu_execute_isStuck,0,0); - VL_IN8(io_cpu_execute_args_kind,0,0); - VL_IN8(io_cpu_execute_args_wr,0,0); - VL_IN8(io_cpu_execute_args_size,1,0); - VL_IN8(io_cpu_execute_args_forceUncachedAccess,0,0); - VL_IN8(io_cpu_execute_args_clean,0,0); - VL_IN8(io_cpu_execute_args_invalidate,0,0); - VL_IN8(io_cpu_execute_args_way,0,0); - if (top->dBus_cmd_valid && top->dBus_cmd_ready) { if(top->dBus_cmd_payload_wr){ #ifndef SMP @@ -2378,9 +2370,11 @@ public: ws->dBusAccess(top->dBus_cmd_payload_address + beat * 4,0,2,0,&rsp.data,&rsp.error); rsp.last = beat == top->dBus_cmd_payload_length; #ifdef SMP - rsp.exclusive = true; - reservationValid = true; - reservationAddress = top->dBus_cmd_payload_address; + if(top->dBus_cmd_payload_exclusive){ + rsp.exclusive = true; + reservationValid = true; + reservationAddress = top->dBus_cmd_payload_address; + } #endif rsps.push(rsp); } From a107e45116af7404b2bd94949dee4acc2a57c1a8 Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Mon, 6 Apr 2020 06:43:28 +0200 Subject: [PATCH 06/91] fix non smp regression --- src/test/cpp/regression/main.cpp | 6 +++++- src/test/scala/vexriscv/TestIndividualFeatures.scala | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/test/cpp/regression/main.cpp b/src/test/cpp/regression/main.cpp index 62c58f2..ac03373 100644 --- a/src/test/cpp/regression/main.cpp +++ b/src/test/cpp/regression/main.cpp @@ -904,7 +904,11 @@ public: trap(0, 6, address); } else { if(v2p(address, &pAddr, WRITE)){ trap(0, 15, address); return; } - bool hit = lrscReserved && lrscReservedAddress == pAddr; + #ifdef SMP + bool hit = lrscReserved && lrscReservedAddress == pAddr; + #else + bool hit = lrscReserved; + #endif if(hit){ dWrite(pAddr, 4, i32_rs2); } diff --git a/src/test/scala/vexriscv/TestIndividualFeatures.scala b/src/test/scala/vexriscv/TestIndividualFeatures.scala index 9d147bc..8111258 100644 --- a/src/test/scala/vexriscv/TestIndividualFeatures.scala +++ b/src/test/scala/vexriscv/TestIndividualFeatures.scala @@ -716,7 +716,7 @@ class TestIndividualFeatures extends MultithreadedFunSuite { //Test RTL val debug = true - val stdCmd = (s"make run REGRESSION_PATH=../../src/test/cpp/regression VEXRISCV_FILE=VexRiscv.v WITH_USER_IO=no REDO=10 TRACE=${if(debug) "yes" else "no"} TRACE_START=1000000000000l FLOW_INFO=no STOP_ON_ERROR=no DHRYSTONE=yes COREMARK=${coremarkRegression} THREAD_COUNT=1 ") + s" SEED=${testSeed} " + val stdCmd = (s"make run REGRESSION_PATH=../../src/test/cpp/regression VEXRISCV_FILE=VexRiscv.v WITH_USER_IO=no REDO=10 TRACE=${if(debug) "yes" else "no"} TRACE_START=1000000000ll FLOW_INFO=no STOP_ON_ERROR=no DHRYSTONE=yes COREMARK=${coremarkRegression} THREAD_COUNT=1 ") + s" SEED=${testSeed} " val testCmd = stdCmd + (positionsToApply).map(_.testParam).mkString(" ") println(testCmd) val str = doCmd(testCmd) From a52b833727456d8811c0f2f6c57fd84b58e940d9 Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Mon, 6 Apr 2020 21:42:44 +0200 Subject: [PATCH 07/91] fix weird regression testbench memory bug --- src/test/cpp/regression/main.cpp | 4 ++-- src/test/scala/vexriscv/TestIndividualFeatures.scala | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/test/cpp/regression/main.cpp b/src/test/cpp/regression/main.cpp index ac03373..28d22ee 100644 --- a/src/test/cpp/regression/main.cpp +++ b/src/test/cpp/regression/main.cpp @@ -1782,7 +1782,7 @@ public: virtual void dutPutChar(char c){ if(*hit == c) hit++; else hit = target; - if(*hit == NULL) { + if(*hit == 0) { cout << endl << "T=" << i <ws = ws; @@ -2370,7 +2371,6 @@ public: #endif } else { for(int beat = 0;beat <= top->dBus_cmd_payload_length;beat++){ - DBusCachedTask rsp; ws->dBusAccess(top->dBus_cmd_payload_address + beat * 4,0,2,0,&rsp.data,&rsp.error); rsp.last = beat == top->dBus_cmd_payload_length; #ifdef SMP diff --git a/src/test/scala/vexriscv/TestIndividualFeatures.scala b/src/test/scala/vexriscv/TestIndividualFeatures.scala index 8111258..d292203 100644 --- a/src/test/scala/vexriscv/TestIndividualFeatures.scala +++ b/src/test/scala/vexriscv/TestIndividualFeatures.scala @@ -716,7 +716,7 @@ class TestIndividualFeatures extends MultithreadedFunSuite { //Test RTL val debug = true - val stdCmd = (s"make run REGRESSION_PATH=../../src/test/cpp/regression VEXRISCV_FILE=VexRiscv.v WITH_USER_IO=no REDO=10 TRACE=${if(debug) "yes" else "no"} TRACE_START=1000000000ll FLOW_INFO=no STOP_ON_ERROR=no DHRYSTONE=yes COREMARK=${coremarkRegression} THREAD_COUNT=1 ") + s" SEED=${testSeed} " + val stdCmd = (s"make run REGRESSION_PATH=../../src/test/cpp/regression VEXRISCV_FILE=VexRiscv.v WITH_USER_IO=no REDO=10 TRACE=${if(debug) "yes" else "no"} TRACE_START=100000000000ll FLOW_INFO=no STOP_ON_ERROR=no DHRYSTONE=yes COREMARK=${coremarkRegression} THREAD_COUNT=1 ") + s" SEED=${testSeed} " val testCmd = stdCmd + (positionsToApply).map(_.testParam).mkString(" ") println(testCmd) val str = doCmd(testCmd) From 0c8ea4a3681b3f800e01aadb20907b403c6e9dc6 Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Tue, 7 Apr 2020 19:18:20 +0200 Subject: [PATCH 08/91] DataCache add invalidation feature --- src/main/scala/vexriscv/TestsWorkspace.scala | 5 +- src/main/scala/vexriscv/ip/DataCache.scala | 110 +++++++++++++++--- .../vexriscv/plugin/DBusCachedPlugin.scala | 3 + src/test/cpp/regression/main.cpp | 10 +- src/test/cpp/regression/makefile | 6 +- 5 files changed, 108 insertions(+), 26 deletions(-) diff --git a/src/main/scala/vexriscv/TestsWorkspace.scala b/src/main/scala/vexriscv/TestsWorkspace.scala index 9c223a7..5177631 100644 --- a/src/main/scala/vexriscv/TestsWorkspace.scala +++ b/src/main/scala/vexriscv/TestsWorkspace.scala @@ -27,7 +27,7 @@ import spinal.lib.bus.avalon.AvalonMM import spinal.lib.eda.altera.{InterruptReceiverTag, ResetEmitterTag} -//make clean all SEED=42 MMU=no STOP_ON_ERROR=yes SMP=yes SUPERVISOR=yes REDO=10 DHRYSTONE=no LRSC=yes LINUX_REGRESSION=yes TRACE=yes TRACE_START=1000000000000l FLOW_INFO=no +//make clean all SEED=42 MMU=no STOP_ON_ERROR=yes DBUS_EXCLUSIVE=yes SUPERVISOR=yes REDO=10 DHRYSTONE=no LRSC=yes LINUX_REGRESSION=yes TRACE=yes TRACE_START=1000000000000l FLOW_INFO=no object TestsWorkspace { def main(args: Array[String]) { def configFull = { @@ -102,7 +102,8 @@ object TestsWorkspace { catchUnaligned = true, withLrSc = true, withAmo = false, - withSmp = true + withExclusive = true, + withInvalidate = true // ) ), memoryTranslatorPortConfig = MmuPortConfig( diff --git a/src/main/scala/vexriscv/ip/DataCache.scala b/src/main/scala/vexriscv/ip/DataCache.scala index 758186a..a69f897 100644 --- a/src/main/scala/vexriscv/ip/DataCache.scala +++ b/src/main/scala/vexriscv/ip/DataCache.scala @@ -25,7 +25,8 @@ case class DataCacheConfig(cacheSize : Int, tagSizeShift : Int = 0, //Used to force infering ram withLrSc : Boolean = false, withAmo : Boolean = false, - withSmp : Boolean = false, + withExclusive : Boolean = false, + withInvalidate : Boolean = false, pendingMax : Int = 64, mergeExecuteMemory : Boolean = false){ assert(!(mergeExecuteMemory && (earlyDataMux || earlyWaysHits))) @@ -33,9 +34,9 @@ case class DataCacheConfig(cacheSize : Int, def burstSize = bytePerLine*8/memDataWidth val burstLength = bytePerLine/(memDataWidth/8) def catchSomething = catchUnaligned || catchIllegal || catchAccessError - def withInternalAmo = withAmo && !withSmp - def withInternalLrSc = withLrSc && !withSmp - def withExternalLrSc = withLrSc && withSmp + def withInternalAmo = withAmo && !withExclusive + def withInternalLrSc = withLrSc && !withExclusive + def withExternalLrSc = withLrSc && withExclusive def getAxi4SharedConfig() = Axi4Config( addressWidth = addressWidth, dataWidth = memDataWidth, @@ -171,14 +172,30 @@ case class DataCacheMemCmd(p : DataCacheConfig) extends Bundle{ val data = Bits(p.memDataWidth bits) val mask = Bits(p.memDataWidth/8 bits) val length = UInt(log2Up(p.burstLength) bits) - val exclusive = p.withSmp generate Bool() + val exclusive = p.withExclusive generate Bool() val last = Bool } case class DataCacheMemRsp(p : DataCacheConfig) extends Bundle{ val last = Bool() val data = Bits(p.memDataWidth bit) val error = Bool - val exclusive = p.withSmp generate Bool() + val exclusive = p.withExclusive generate Bool() +} +case class DataCacheInvalidateCmd(p : DataCacheConfig) extends Bundle{ + val address = UInt(p.addressWidth bit) +} +case class DataCacheInvalidateRsp(p : DataCacheConfig) extends Bundle{ + val hit = Bool() +} + +case class DataCacheInvalidateBus(p : DataCacheConfig) extends Bundle with IMasterSlave { + val cmd = Stream(DataCacheInvalidateCmd(p)) + val rsp = Stream(DataCacheInvalidateRsp(p)) + + override def asMaster(): Unit = { + master(cmd) + slave(rsp) + } } case class DataCacheMemBus(p : DataCacheConfig) extends Bundle with IMasterSlave{ @@ -353,7 +370,7 @@ class DataCache(p : DataCacheConfig) extends Component{ val io = new Bundle{ val cpu = slave(DataCacheCpuBus(p)) val mem = master(DataCacheMemBus(p)) - // val flushDone = out Bool //It pulse at the same time than the manager.request.fire + val inv = withInvalidate generate slave(DataCacheInvalidateBus(p)) } val haltCpu = False @@ -371,6 +388,7 @@ class DataCache(p : DataCacheConfig) extends Component{ val tagRange = addressWidth-1 downto log2Up(wayLineCount*bytePerLine) val lineRange = tagRange.low-1 downto log2Up(bytePerLine) val wordRange = log2Up(bytePerLine)-1 downto log2Up(bytePerWord) + val hitRange = tagRange.high downto lineRange.low class LineInfo() extends Bundle{ @@ -379,6 +397,7 @@ class DataCache(p : DataCacheConfig) extends Component{ } val tagsReadCmd = Flow(UInt(log2Up(wayLineCount) bits)) + val tagsInvReadCmd = withInvalidate generate Flow(UInt(log2Up(wayLineCount) bits)) val tagsWriteCmd = Flow(new Bundle{ val way = Bits(wayCount bits) val address = UInt(log2Up(wayLineCount) bits) @@ -403,6 +422,7 @@ class DataCache(p : DataCacheConfig) extends Component{ //Reads val tagsReadRsp = tags.readSync(tagsReadCmd.payload, tagsReadCmd.valid && !io.cpu.memory.isStuck) + val tagsInvReadRsp = withInvalidate generate tags.readSync(tagsInvReadCmd.payload, tagsReadCmd.valid && !io.cpu.memory.isStuck) val dataReadRsp = data.readSync(dataReadCmd.payload, dataReadCmd.valid && !io.cpu.memory.isStuck) //Writes @@ -449,7 +469,7 @@ class DataCache(p : DataCacheConfig) extends Component{ val rspSync = True val rspLast = True val memCmdSent = RegInit(False) setWhen (io.mem.cmd.ready) clearWhen (!io.cpu.writeBack.isStuck) - val pending = withSmp generate new Area{ + val pending = withExclusive generate new Area{ val counter = Reg(UInt(log2Up(pendingMax) + 1 bits)) init(0) counter := counter + U(io.mem.cmd.fire && io.mem.cmd.last) - U(io.mem.rsp.valid && io.mem.rsp.last) @@ -468,7 +488,8 @@ class DataCache(p : DataCacheConfig) extends Component{ U(1) -> B"0011", default -> B"1111" ) |<< io.cpu.execute.address(1 downto 0) - val colisions = collisionProcess(io.cpu.execute.address(lineRange.high downto wordRange.low), mask) + val dataColisions = collisionProcess(io.cpu.execute.address(lineRange.high downto wordRange.low), mask) + val wayInvalidate = B(0, wayCount bits) //Used if invalidate enabled } val stageA = new Area{ @@ -483,11 +504,12 @@ class DataCache(p : DataCacheConfig) extends Component{ val wayHits = earlyWaysHits generate ways.map(way => (io.cpu.memory.mmuBus.rsp.physicalAddress(tagRange) === way.tagsReadRsp.address && way.tagsReadRsp.valid)) val dataMux = earlyDataMux generate MuxOH(wayHits, ways.map(_.dataReadRsp)) - val colisions = if(mergeExecuteMemory){ - stagePipe(stage0.colisions) + val wayInvalidate = stagePipe(stage0. wayInvalidate) + val dataColisions = if(mergeExecuteMemory){ + stagePipe(stage0.dataColisions) } else { //Assume the writeback stage will never be unstall memory acces while memory stage is stalled - stagePipe(stage0.colisions) | collisionProcess(io.cpu.memory.address(lineRange.high downto wordRange.low), mask) + stagePipe(stage0.dataColisions) | collisionProcess(io.cpu.memory.address(lineRange.high downto wordRange.low), mask) } } @@ -499,11 +521,13 @@ class DataCache(p : DataCacheConfig) extends Component{ val mmuRsp = RegNextWhen(io.cpu.memory.mmuBus.rsp, !io.cpu.writeBack.isStuck && !mmuRspFreeze) val tagsReadRsp = ways.map(w => ramPipe(w.tagsReadRsp)) val dataReadRsp = !earlyDataMux generate ways.map(w => ramPipe(w.dataReadRsp)) - val waysHits = if(earlyWaysHits) stagePipe(B(stageA.wayHits)) else B(tagsReadRsp.map(tag => mmuRsp.physicalAddress(tagRange) === tag.address && tag.valid).asBits()) + val wayInvalidate = stagePipe(stageA. wayInvalidate) + val dataColisions = stagePipe(stageA.dataColisions) + val waysHitsBeforeInvalidate = if(earlyWaysHits) stagePipe(B(stageA.wayHits)) else B(tagsReadRsp.map(tag => mmuRsp.physicalAddress(tagRange) === tag.address && tag.valid).asBits()) + val waysHits = waysHitsBeforeInvalidate & ~wayInvalidate val waysHit = waysHits.orR val dataMux = if(earlyDataMux) stagePipe(stageA.dataMux) else MuxOH(waysHits, dataReadRsp) val mask = stagePipe(stageA.mask) - val colisions = stagePipe(stageA.colisions) //Loader interface val loaderValid = False @@ -631,8 +655,8 @@ class DataCache(p : DataCacheConfig) extends Component{ } } - //On write to read colisions - when((!request.wr || isAmo) && (colisions & waysHits) =/= 0){ + //On write to read dataColisions + when((!request.wr || isAmo) && (dataColisions & waysHits) =/= 0){ io.cpu.redo := True if(withAmo) io.mem.cmd.valid := False } @@ -699,6 +723,7 @@ class DataCache(p : DataCacheConfig) extends Component{ val counter = Counter(memTransactionPerLine) val waysAllocator = Reg(Bits(wayCount bits)) init(1) val error = RegInit(False) + val kill = False when(valid && io.mem.rsp.valid && rspLast){ dataWriteCmd.valid := True @@ -731,5 +756,58 @@ class DataCache(p : DataCacheConfig) extends Component{ io.cpu.redo setWhen(valid) stageB.mmuRspFreeze setWhen(stageB.loaderValid || valid) + + when(kill){ + valid := False + error := False + tagsWriteCmd.valid := False + counter.clear() + } + } + + val invalidate = withInvalidate generate new Area{ + val loaderReadToWriteConflict = False + val s0 = new Area{ + val input = io.inv.cmd.haltWhen(loaderReadToWriteConflict) + tagsInvReadCmd.valid := input.fire + tagsInvReadCmd.payload := input.address(lineRange) + + val loaderHit = loader.valid && input.address(hitRange) === loader.baseAddress(hitRange) + when(loaderHit){ + loader.kill := True + } + } + val s1 = new Area{ + val input = s0.input.stage() + val loaderValid = RegNextWhen(loader.valid, s0.input.ready) + val loaderWay = RegNextWhen(loader.waysAllocator, s0.input.ready) + val loaderHit = RegNextWhen(s0.loaderHit, s0.input.ready) + + var wayHits = B(ways.map(way => (input.address(tagRange) === way.tagsInvReadRsp.address && way.tagsInvReadRsp.valid))) + + //Handle invalider read during loader write hazard + when(loaderValid && !loaderHit){ + wayHits \= wayHits & ~loaderWay + } + } + val s2 = new Area{ + val input = s1.input.stage() + val wayHits = RegNextWhen(s1.wayHits, s1.input.ready) + val wayHit = wayHits.orR + + when(input.valid) { + stage0.wayInvalidate := wayHits + + when(wayHit) { + tagsWriteCmd.valid := True + tagsWriteCmd.address := input.address(lineRange) + tagsWriteCmd.data.valid := False + tagsWriteCmd.way := wayHits + loaderReadToWriteConflict := input.address(lineRange) === s0.input.address(lineRange) + } + } + io.inv.rsp.arbitrationFrom(input) + io.inv.rsp.hit := wayHit + } } } \ No newline at end of file diff --git a/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala b/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala index 958d1a8..2764637 100644 --- a/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala +++ b/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala @@ -31,6 +31,7 @@ class DBusCachedPlugin(val config : DataCacheConfig, assert(!(memoryTranslatorPortConfig != null && config.cacheSize/config.wayCount > 4096), "When the D$ is used with MMU, each way can't be bigger than a page (4096 bytes)") var dBus : DataCacheMemBus = null + var inv : DataCacheInvalidateBus = null var mmuBus : MemoryTranslatorBus = null var exceptionBus : Flow[ExceptionCause] = null var privilegeService : PrivilegeService = null @@ -161,6 +162,7 @@ class DBusCachedPlugin(val config : DataCacheConfig, import pipeline.config._ dBus = master(DataCacheMemBus(this.config)).setName("dBus") + inv = withInvalidate generate slave(DataCacheInvalidateBus(this.config)) val cache = new DataCache(this.config.copy( mergeExecuteMemory = writeBack == null @@ -171,6 +173,7 @@ class DBusCachedPlugin(val config : DataCacheConfig, def cmdBuf = optionPipe(dBusCmdSlavePipe, cache.io.mem.cmd)(_.s2mPipe()) dBus.cmd << optionPipe(dBusCmdMasterPipe, cmdBuf)(_.m2sPipe()) cache.io.mem.rsp << optionPipe(dBusRspSlavePipe,dBus.rsp)(_.m2sPipe()) + cache.io.inv <> inv pipeline plug new Area{ //Memory bandwidth counter diff --git a/src/test/cpp/regression/main.cpp b/src/test/cpp/regression/main.cpp index 28d22ee..47a822b 100644 --- a/src/test/cpp/regression/main.cpp +++ b/src/test/cpp/regression/main.cpp @@ -904,7 +904,7 @@ public: trap(0, 6, address); } else { if(v2p(address, &pAddr, WRITE)){ trap(0, 15, address); return; } - #ifdef SMP + #ifdef DBUS_EXCLUSIVE bool hit = lrscReserved && lrscReservedAddress == pAddr; #else bool hit = lrscReserved; @@ -2353,7 +2353,7 @@ public: virtual void preCycle(){ if (top->dBus_cmd_valid && top->dBus_cmd_ready) { if(top->dBus_cmd_payload_wr){ - #ifndef SMP + #ifndef DBUS_EXCLUSIVE bool error; ws->dBusAccess(top->dBus_cmd_payload_address,1,2,top->dBus_cmd_payload_mask,&top->dBus_cmd_payload_data,&error); #else @@ -2373,7 +2373,7 @@ public: for(int beat = 0;beat <= top->dBus_cmd_payload_length;beat++){ ws->dBusAccess(top->dBus_cmd_payload_address + beat * 4,0,2,0,&rsp.data,&rsp.error); rsp.last = beat == top->dBus_cmd_payload_length; - #ifdef SMP + #ifdef DBUS_EXCLUSIVE if(top->dBus_cmd_payload_exclusive){ rsp.exclusive = true; reservationValid = true; @@ -2395,7 +2395,7 @@ public: top->dBus_rsp_payload_error = rsp.error; top->dBus_rsp_payload_data = rsp.data; top->dBus_rsp_payload_last = rsp.last; - #ifdef SMP + #ifdef DBUS_EXCLUSIVE top->dBus_rsp_payload_exclusive = rsp.exclusive; #endif } else{ @@ -2403,7 +2403,7 @@ public: top->dBus_rsp_payload_data = VL_RANDOM_I(32); top->dBus_rsp_payload_error = VL_RANDOM_I(1); top->dBus_rsp_payload_last = VL_RANDOM_I(1); - #ifdef SMP + #ifdef DBUS_EXCLUSIVE top->dBus_rsp_payload_exclusive = VL_RANDOM_I(1); #endif } diff --git a/src/test/cpp/regression/makefile b/src/test/cpp/regression/makefile index fba824e..48e5551 100644 --- a/src/test/cpp/regression/makefile +++ b/src/test/cpp/regression/makefile @@ -15,7 +15,7 @@ CSR_SKIP_TEST?=no EBREAK?=no FENCEI?=no MMU?=yes -SMP?=no +DBUS_EXCLUSIVE?=no SEED?=no LRSC?=no AMO?=no @@ -218,8 +218,8 @@ ifeq ($(MMU),yes) ADDCFLAGS += -CFLAGS -DMMU endif -ifeq ($(SMP),yes) - ADDCFLAGS += -CFLAGS -DSMP +ifeq ($(DBUS_EXCLUSIVE),yes) + ADDCFLAGS += -CFLAGS -DDBUS_EXCLUSIVE endif ifeq ($(MUL),yes) From 9e1817a28034bab4a07782b1320bffe345335e76 Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Tue, 7 Apr 2020 20:05:24 +0200 Subject: [PATCH 09/91] fix DataCache for config without invalidation --- src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala b/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala index 2764637..7df58ce 100644 --- a/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala +++ b/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala @@ -173,7 +173,7 @@ class DBusCachedPlugin(val config : DataCacheConfig, def cmdBuf = optionPipe(dBusCmdSlavePipe, cache.io.mem.cmd)(_.s2mPipe()) dBus.cmd << optionPipe(dBusCmdMasterPipe, cmdBuf)(_.m2sPipe()) cache.io.mem.rsp << optionPipe(dBusRspSlavePipe,dBus.rsp)(_.m2sPipe()) - cache.io.inv <> inv + if(withInvalidate) cache.io.inv <> inv pipeline plug new Area{ //Memory bandwidth counter From 6922f80a87c140ce1cf7f878eff276d29dc6e6a9 Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Wed, 8 Apr 2020 18:12:13 +0200 Subject: [PATCH 10/91] DataCache now implement fence operations --- src/main/scala/vexriscv/ip/DataCache.scala | 10 +++++- .../vexriscv/plugin/DBusCachedPlugin.scala | 35 ++++++++++++++++++- 2 files changed, 43 insertions(+), 2 deletions(-) diff --git a/src/main/scala/vexriscv/ip/DataCache.scala b/src/main/scala/vexriscv/ip/DataCache.scala index a69f897..85d7d0c 100644 --- a/src/main/scala/vexriscv/ip/DataCache.scala +++ b/src/main/scala/vexriscv/ip/DataCache.scala @@ -31,6 +31,7 @@ case class DataCacheConfig(cacheSize : Int, mergeExecuteMemory : Boolean = false){ assert(!(mergeExecuteMemory && (earlyDataMux || earlyWaysHits))) assert(!(earlyDataMux && !earlyWaysHits)) + def withWriteResponse = withExclusive def burstSize = bytePerLine*8/memDataWidth val burstLength = bytePerLine/(memDataWidth/8) def catchSomething = catchUnaligned || catchIllegal || catchAccessError @@ -95,9 +96,10 @@ case class DataCacheCpuExecute(p : DataCacheConfig) extends Bundle with IMasterS val address = UInt(p.addressWidth bit) val haltIt = Bool val args = DataCacheCpuExecuteArgs(p) + val fence = Bool() override def asMaster(): Unit = { - out(isValid, args, address) + out(isValid, args, address, fence) in(haltIt) } } @@ -490,6 +492,12 @@ class DataCache(p : DataCacheConfig) extends Component{ ) |<< io.cpu.execute.address(1 downto 0) val dataColisions = collisionProcess(io.cpu.execute.address(lineRange.high downto wordRange.low), mask) val wayInvalidate = B(0, wayCount bits) //Used if invalidate enabled + + if(withWriteResponse) when(io.cpu.execute.fence){ + when(pending.counter =/= 0 || io.cpu.memory.isValid || io.cpu.writeBack.isValid){ + io.cpu.execute.haltIt := True + } + } } val stageA = new Area{ diff --git a/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala b/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala index 7df58ce..0d7c930 100644 --- a/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala +++ b/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala @@ -50,6 +50,8 @@ class DBusCachedPlugin(val config : DataCacheConfig, object MEMORY_ADDRESS_LOW extends Stageable(UInt(2 bits)) object MEMORY_LRSC extends Stageable(Bool) object MEMORY_AMO extends Stageable(Bool) + object MEMORY_FENCE extends Stageable(Bool) + object MEMORY_FENCE_DECODED extends Stageable(Bool) object IS_DBUS_SHARING extends Stageable(Bool()) object MEMORY_VIRTUAL_ADDRESS extends Stageable(UInt(32 bits)) @@ -143,7 +145,13 @@ class DBusCachedPlugin(val config : DataCacheConfig, MEMORY_MANAGMENT -> True )) - decoderService.add(FENCE, Nil) + withWriteResponse match { + case false => decoderService.add(FENCE, Nil) + case true => { + decoderService.addDefault(MEMORY_FENCE, False) + decoderService.add(FENCE, List(MEMORY_FENCE -> True)) + } + } mmuBus = pipeline.service(classOf[MemoryTranslator]).newTranslationPort(MemoryTranslatorPort.PRIORITY_DATA ,memoryTranslatorPortConfig) redoBranch = pipeline.service(classOf[JumpService]).createJumpInterface(if(pipeline.writeBack != null) pipeline.writeBack else pipeline.memory) @@ -189,6 +197,30 @@ class DBusCachedPlugin(val config : DataCacheConfig, when(mmuBus.busy && arbitration.isValid && input(MEMORY_ENABLE)) { arbitration.haltItself := True } + + case class FenceFlags() extends Bundle { + val SW,SR,SO,SI,PW,PR,PO,PI = Bool() + val FM = Bits(4 bits) + + def SL = SR || SI + def SS = SW || SO + def PL = PR || PI + def PS = PW || PO + } + + val fence = new Area{ + val hazard = False + val ff = input(INSTRUCTION)(31 downto 20).as(FenceFlags()) + if(withWriteResponse){ + hazard setWhen(input(MEMORY_FENCE) && (ff.PS && ff.SL)) //Manage write to read hit ordering (ensure invalidation timings) +// Not required as LR SC AMO naturaly enforce ordering +// when(input(INSTRUCTION)(26 downto 25) =/= 0){ +// if(withLrSc) hazard setWhen(input(MEMORY_LRSC)) +// if(withAmo) hazard setWhen(input(MEMORY_AMO)) +// } + } + insert(MEMORY_FENCE_DECODED) := hazard + } } execute plug new Area { @@ -207,6 +239,7 @@ class DBusCachedPlugin(val config : DataCacheConfig, cache.io.cpu.flush.valid := arbitration.isValid && input(MEMORY_MANAGMENT) + cache.io.cpu.execute.fence := arbitration.isValid && input(MEMORY_FENCE_DECODED) arbitration.haltItself setWhen(cache.io.cpu.flush.isStall || cache.io.cpu.execute.haltIt) if(withLrSc) { From 861df664cf0f6a29196b6642445461fe95dc4283 Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Wed, 8 Apr 2020 18:48:01 +0200 Subject: [PATCH 11/91] clean some AMO stuff --- src/main/scala/vexriscv/ip/DataCache.scala | 7 ++++--- src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/main/scala/vexriscv/ip/DataCache.scala b/src/main/scala/vexriscv/ip/DataCache.scala index 85d7d0c..d0a536b 100644 --- a/src/main/scala/vexriscv/ip/DataCache.scala +++ b/src/main/scala/vexriscv/ip/DataCache.scala @@ -629,6 +629,7 @@ class DataCache(p : DataCacheConfig) extends Component{ if(withExternalLrSc) io.mem.cmd.exclusive := request.isLrsc || (if(withAmo) request.isAmo else False) val bypassCache = mmuRsp.isIoAccess || (if(withExternalLrSc) request.isLrsc else False) + val isAmoCached = if(withInternalAmo) isAmo else False when(io.cpu.writeBack.isValid) { when(bypassCache) { @@ -646,7 +647,7 @@ class DataCache(p : DataCacheConfig) extends Component{ io.cpu.writeBack.haltIt := False } } otherwise { - when(waysHit || request.wr && !isAmo) { //Do not require a cache refill ? + when(waysHit || request.wr && !isAmoCached) { //Do not require a cache refill ? cpuWriteToCache := True //Write through @@ -655,7 +656,7 @@ class DataCache(p : DataCacheConfig) extends Component{ io.mem.cmd.length := 0 io.cpu.writeBack.haltIt clearWhen(!request.wr || io.mem.cmd.ready) - if(withAmo) when(isAmo){ + if(withInternalAmo) when(isAmo){ when(!internalAmo.resultRegValid) { io.mem.cmd.valid := False dataWriteCmd.valid := False @@ -664,7 +665,7 @@ class DataCache(p : DataCacheConfig) extends Component{ } //On write to read dataColisions - when((!request.wr || isAmo) && (dataColisions & waysHits) =/= 0){ + when((!request.wr || isAmoCached) && (dataColisions & waysHits) =/= 0){ io.cpu.redo := True if(withAmo) io.mem.cmd.valid := False } diff --git a/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala b/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala index 0d7c930..3a13a7c 100644 --- a/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala +++ b/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala @@ -213,7 +213,7 @@ class DBusCachedPlugin(val config : DataCacheConfig, val ff = input(INSTRUCTION)(31 downto 20).as(FenceFlags()) if(withWriteResponse){ hazard setWhen(input(MEMORY_FENCE) && (ff.PS && ff.SL)) //Manage write to read hit ordering (ensure invalidation timings) -// Not required as LR SC AMO naturaly enforce ordering +// Not required as LR SC AMO emited on the memory bus enforce the ordering, + it bypass the cache // when(input(INSTRUCTION)(26 downto 25) =/= 0){ // if(withLrSc) hazard setWhen(input(MEMORY_LRSC)) // if(withAmo) hazard setWhen(input(MEMORY_AMO)) From 1d0e180e1d9eb611645b00f614639c9dbdef576b Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Thu, 9 Apr 2020 20:11:56 +0200 Subject: [PATCH 12/91] Add GenTwoStage config and UltraScale synthesis --- .../scala/vexriscv/demo/GenTwoStage.scala | 71 ++++++++++++ .../scala/vexriscv/demo/SynthesisBench.scala | 101 ++++++++++++++++-- src/test/scala/vexriscv/DhrystoneBench.scala | 42 +++++++- 3 files changed, 205 insertions(+), 9 deletions(-) create mode 100644 src/main/scala/vexriscv/demo/GenTwoStage.scala diff --git a/src/main/scala/vexriscv/demo/GenTwoStage.scala b/src/main/scala/vexriscv/demo/GenTwoStage.scala new file mode 100644 index 0000000..b3d0804 --- /dev/null +++ b/src/main/scala/vexriscv/demo/GenTwoStage.scala @@ -0,0 +1,71 @@ +package vexriscv.demo + +import spinal.core.SpinalVerilog +import vexriscv.{VexRiscv, VexRiscvConfig, plugin} +import vexriscv.plugin.{BranchPlugin, CsrPlugin, CsrPluginConfig, DBusSimplePlugin, DecoderSimplePlugin, DivPlugin, FullBarrelShifterPlugin, HazardSimplePlugin, IBusSimplePlugin, IntAluPlugin, LightShifterPlugin, MulPlugin, MulSimplePlugin, NONE, RegFilePlugin, SrcPlugin, YamlPlugin} + +object GenTwoStage extends App{ + def cpu(withMulDiv : Boolean, + bypass : Boolean, + barrielShifter : Boolean) = new VexRiscv( + config = VexRiscvConfig( + withMemoryStage = false, + withWriteBackStage = false, + plugins = List( + new IBusSimplePlugin( + resetVector = 0x80000000l, + cmdForkOnSecondStage = false, + cmdForkPersistence = false, + prediction = NONE, + catchAccessFault = false, + compressedGen = false, + injectorStage = false + ), + new DBusSimplePlugin( + catchAddressMisaligned = false, + catchAccessFault = false + ), + new CsrPlugin(CsrPluginConfig.smallest), + new DecoderSimplePlugin( + catchIllegalInstruction = false + ), + new RegFilePlugin( + regFileReadyKind = plugin.SYNC, + readInExecute = true, + zeroBoot = true, + x0Init = false + ), + new IntAluPlugin, + new SrcPlugin( + separatedAddSub = false, + executeInsertion = true + ), + new HazardSimplePlugin( + bypassExecute = bypass, + bypassMemory = false, + bypassWriteBack = false, + bypassWriteBackBuffer = bypass, + pessimisticUseSrc = false, + pessimisticWriteRegFile = false, + pessimisticAddressMatch = false + ), + new BranchPlugin( + earlyBranch = true, + catchAddressMisaligned = false + ), + new YamlPlugin("cpu0.yaml") + ) ++ (if(!withMulDiv) Nil else List( + new MulSimplePlugin, + new DivPlugin + )) ++ List(if(!barrielShifter) + new LightShifterPlugin + else + new FullBarrelShifterPlugin( + earlyInjection = true + ) + ) + ) + ) + + SpinalVerilog(cpu(false,false,false)) +} diff --git a/src/main/scala/vexriscv/demo/SynthesisBench.scala b/src/main/scala/vexriscv/demo/SynthesisBench.scala index a9961a9..3d9dbf9 100644 --- a/src/main/scala/vexriscv/demo/SynthesisBench.scala +++ b/src/main/scala/vexriscv/demo/SynthesisBench.scala @@ -4,6 +4,7 @@ import spinal.core._ import spinal.lib._ import spinal.lib.eda.bench._ import spinal.lib.eda.icestorm.IcestormStdTargets +import spinal.lib.eda.xilinx.VivadoFlow import spinal.lib.io.InOutWrapper import vexriscv.VexRiscv import vexriscv.plugin.DecoderSimplePlugin @@ -49,6 +50,42 @@ object VexRiscvSynthesisBench { // top // } + val twoStage = new Rtl { + override def getName(): String = "VexRiscv two stages" + override def getRtlPath(): String = "VexRiscvTwoStages.v" + SpinalVerilog(wrap(GenTwoStage.cpu( + withMulDiv = false, + bypass = false, + barrielShifter = false + )).setDefinitionName(getRtlPath().split("\\.").head)) + } + val twoStageBarell = new Rtl { + override def getName(): String = "VexRiscv two stages with barriel" + override def getRtlPath(): String = "VexRiscvTwoStagesBar.v" + SpinalVerilog(wrap(GenTwoStage.cpu( + withMulDiv = false, + bypass = true, + barrielShifter = true + )).setDefinitionName(getRtlPath().split("\\.").head)) + } + val twoStageMulDiv = new Rtl { + override def getName(): String = "VexRiscv two stages with Mul Div" + override def getRtlPath(): String = "VexRiscvTwoStagesMD.v" + SpinalVerilog(wrap(GenTwoStage.cpu( + withMulDiv = true, + bypass = false, + barrielShifter = false + )).setDefinitionName(getRtlPath().split("\\.").head)) + } + val twoStageAll = new Rtl { + override def getName(): String = "VexRiscv two stages with Mul Div fast" + override def getRtlPath(): String = "VexRiscvTwoStagesMDfast.v" + SpinalVerilog(wrap(GenTwoStage.cpu( + withMulDiv = true, + bypass = true, + barrielShifter = true + )).setDefinitionName(getRtlPath().split("\\.").head)) + } val smallestNoCsr = new Rtl { override def getName(): String = "VexRiscv smallest no CSR" override def getRtlPath(): String = "VexRiscvSmallestNoCsr.v" @@ -109,13 +146,63 @@ object VexRiscvSynthesisBench { SpinalConfig(inlineRom = true).generateVerilog(wrap(new VexRiscv(LinuxGen.configFull(false, true))).setDefinitionName(getRtlPath().split("\\.").head)) } - val rtls = List(smallestNoCsr, smallest, smallAndProductive, smallAndProductiveWithICache, fullNoMmuNoCache, noCacheNoMmuMaxPerf, fullNoMmuMaxPerf, fullNoMmu, full, linuxBalanced) -// val rtls = List(smallestNoCsr, smallest, smallAndProductive, smallAndProductiveWithICache) - // val rtls = List(smallAndProductive, smallAndProductiveWithICache, fullNoMmuMaxPerf, fullNoMmu, full) -// val rtls = List(smallAndProductive) - - val targets = XilinxStdTargets() ++ AlteraStdTargets() ++ IcestormStdTargets().take(1) - + val rtls = List(twoStage, twoStageBarell, twoStageMulDiv, twoStageAll, smallestNoCsr, smallest, smallAndProductive, smallAndProductiveWithICache, fullNoMmuNoCache, noCacheNoMmuMaxPerf, fullNoMmuMaxPerf, fullNoMmu, full, linuxBalanced) +// val rtls = List(twoStage, twoStageBarell, twoStageMulDiv, twoStageAll) +// val rtls = List(smallest) + val targets = XilinxStdTargets() ++ AlteraStdTargets() ++ IcestormStdTargets().take(1) ++ List( + new Target { + override def getFamilyName(): String = "Kintex UltraScale" + override def synthesise(rtl: Rtl, workspace: String): Report = { + VivadoFlow( + frequencyTarget = 50 MHz, + vivadoPath=sys.env.getOrElse("VIVADO_ARTIX_7_BIN", null), + workspacePath=workspace + "_area", + toplevelPath=rtl.getRtlPath(), + family=getFamilyName(), + device="xcku035-fbva900-3-e" + ) + } + }, + new Target { + override def getFamilyName(): String = "Kintex UltraScale" + override def synthesise(rtl: Rtl, workspace: String): Report = { + VivadoFlow( + frequencyTarget = 800 MHz, + vivadoPath=sys.env.getOrElse("VIVADO_ARTIX_7_BIN", null), + workspacePath=workspace + "_fmax", + toplevelPath=rtl.getRtlPath(), + family=getFamilyName(), + device="xcku035-fbva900-3-e" + ) + } + }, + new Target { + override def getFamilyName(): String = "Kintex UltraScale+" + override def synthesise(rtl: Rtl, workspace: String): Report = { + VivadoFlow( + frequencyTarget = 50 MHz, + vivadoPath=sys.env.getOrElse("VIVADO_ARTIX_7_BIN", null), + workspacePath=workspace + "_area", + toplevelPath=rtl.getRtlPath(), + family=getFamilyName(), + device="xcku3p-ffvd900-3-e" + ) + } + }, + new Target { + override def getFamilyName(): String = "Kintex UltraScale+" + override def synthesise(rtl: Rtl, workspace: String): Report = { + VivadoFlow( + frequencyTarget = 800 MHz, + vivadoPath=sys.env.getOrElse("VIVADO_ARTIX_7_BIN", null), + workspacePath=workspace + "_fmax", + toplevelPath=rtl.getRtlPath(), + family=getFamilyName(), + device="xcku3p-ffvd900-3-e" + ) + } + } + ) // val targets = IcestormStdTargets() Bench(rtls, targets) } diff --git a/src/test/scala/vexriscv/DhrystoneBench.scala b/src/test/scala/vexriscv/DhrystoneBench.scala index d23c4e1..39c434a 100644 --- a/src/test/scala/vexriscv/DhrystoneBench.scala +++ b/src/test/scala/vexriscv/DhrystoneBench.scala @@ -2,7 +2,8 @@ package vexriscv import java.io.File -import org.scalatest.{FunSuite} +import org.scalatest.FunSuite +import spinal.core.SpinalVerilog import vexriscv.demo._ import scala.sys.process._ @@ -42,6 +43,43 @@ class DhrystoneBench extends FunSuite{ } + getDmips( + name = "GenTwoStageArty", + gen = SpinalVerilog(GenTwoStage.cpu( + withMulDiv = false, + bypass = false, + barrielShifter = false + )), + testCmd = "make clean run REDO=10 IBUS=SIMPLE DBUS=SIMPLE CSR=no MMU=no DEBUG_PLUGIN=no MUL=no DIV=no COREMARK=yes" + ) + getDmips( + name = "GenTwoStageBarrielArty", + gen = SpinalVerilog(GenTwoStage.cpu( + withMulDiv = false, + bypass = true, + barrielShifter = true + )), + testCmd = "make clean run REDO=10 IBUS=SIMPLE DBUS=SIMPLE CSR=no MMU=no DEBUG_PLUGIN=no MUL=no DIV=no COREMARK=yes" + ) + getDmips( + name = "GenTwoStageMDArty", + gen = SpinalVerilog(GenTwoStage.cpu( + withMulDiv = true, + bypass = false, + barrielShifter = false + )), + testCmd = "make clean run REDO=10 IBUS=SIMPLE DBUS=SIMPLE CSR=no MMU=no DEBUG_PLUGIN=no MUL=yes DIV=yes COREMARK=yes" + ) + getDmips( + name = "GenTwoStageMDBarrielArty", + gen = SpinalVerilog(GenTwoStage.cpu( + withMulDiv = true, + bypass = true, + barrielShifter = true + )), + testCmd = "make clean run REDO=10 IBUS=SIMPLE DBUS=SIMPLE CSR=no MMU=no DEBUG_PLUGIN=no MUL=yes DIV=yes COREMARK=yes" + ) + getDmips( name = "GenSmallestNoCsr", gen = GenSmallestNoCsr.main(null), @@ -104,7 +142,7 @@ class DhrystoneBench extends FunSuite{ gen = LinuxGen.main(Array.fill[String](0)("")), testCmd = "make clean run IBUS=CACHED DBUS=CACHED DEBUG_PLUGIN=STD DHRYSTONE=yes SUPERVISOR=yes MMU=no CSR=yes CSR_SKIP_TEST=yes COMPRESSED=no MUL=yes DIV=yes LRSC=yes AMO=yes REDO=10 TRACE=no COREMARK=yes LINUX_REGRESSION=no" ) - //make run IBUS=CACHED DBUS=CACHED DEBUG_PLUGIN=STD DHRYSTONE=yess SUPERVISOR=yes CSR=yes COMPRESSED=no MUL=yes DIV=yes LRSC=yes AMO=yes REDO=1 TRACE=no LINUX_REGRESSION=yes SEED=42 +// //make run IBUS=CACHED DBUS=CACHED DEBUG_PLUGIN=STD DHRYSTONE=yess SUPERVISOR=yes CSR=yes COMPRESSED=no MUL=yes DIV=yes LRSC=yes AMO=yes REDO=1 TRACE=no LINUX_REGRESSION=yes SEED=42 test("final_report") { From 296cb44bc446c6f23e1898a927999cede629c260 Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Thu, 9 Apr 2020 20:12:37 +0200 Subject: [PATCH 13/91] Add hardware AMO support using LR/SC exclusive --- src/main/scala/vexriscv/TestsWorkspace.scala | 2 +- src/main/scala/vexriscv/ip/DataCache.scala | 109 +++++++++++++----- .../vexriscv/plugin/DBusCachedPlugin.scala | 17 ++- src/test/cpp/regression/main.cpp | 5 +- 4 files changed, 97 insertions(+), 36 deletions(-) diff --git a/src/main/scala/vexriscv/TestsWorkspace.scala b/src/main/scala/vexriscv/TestsWorkspace.scala index 5177631..2daf2f0 100644 --- a/src/main/scala/vexriscv/TestsWorkspace.scala +++ b/src/main/scala/vexriscv/TestsWorkspace.scala @@ -101,7 +101,7 @@ object TestsWorkspace { catchIllegal = true, catchUnaligned = true, withLrSc = true, - withAmo = false, + withAmo = true, withExclusive = true, withInvalidate = true // ) diff --git a/src/main/scala/vexriscv/ip/DataCache.scala b/src/main/scala/vexriscv/ip/DataCache.scala index d0a536b..9f9ad26 100644 --- a/src/main/scala/vexriscv/ip/DataCache.scala +++ b/src/main/scala/vexriscv/ip/DataCache.scala @@ -38,6 +38,7 @@ case class DataCacheConfig(cacheSize : Int, def withInternalAmo = withAmo && !withExclusive def withInternalLrSc = withLrSc && !withExclusive def withExternalLrSc = withLrSc && withExclusive + def withExternalAmo = withAmo && withExclusive def getAxi4SharedConfig() = Axi4Config( addressWidth = addressWidth, dataWidth = memDataWidth, @@ -133,20 +134,21 @@ case class DataCacheCpuMemory(p : DataCacheConfig) extends Bundle with IMasterSl case class DataCacheCpuWriteBack(p : DataCacheConfig) extends Bundle with IMasterSlave{ - val isValid = Bool - val isStuck = Bool - val isUser = Bool - val haltIt = Bool - val isWrite = Bool + val isValid = Bool() + val isStuck = Bool() + val isUser = Bool() + val haltIt = Bool() + val isWrite = Bool() val data = Bits(p.cpuDataWidth bit) val address = UInt(p.addressWidth bit) - val mmuException, unalignedAccess, accessError = Bool + val mmuException, unalignedAccess, accessError = Bool() + val keepMemRspData = Bool() //Used by external AMO to avoid having an internal buffer // val exceptionBus = if(p.catchSomething) Flow(ExceptionCause()) else null override def asMaster(): Unit = { out(isValid,isStuck,isUser, address) - in(haltIt, data, mmuException, unalignedAccess, accessError, isWrite) + in(haltIt, data, mmuException, unalignedAccess, accessError, isWrite, keepMemRspData) } } @@ -364,8 +366,12 @@ case class DataCacheMemBus(p : DataCacheConfig) extends Bundle with IMasterSlave } +object DataCacheExternalAmoStates extends SpinalEnum{ + val LR_CMD, LR_RSP, SC_CMD, SC_RSP = newElement(); +} -class DataCache(p : DataCacheConfig) extends Component{ +//If external amo, mem rsp should stay +class DataCache(val p : DataCacheConfig) extends Component{ import p._ assert(cpuDataWidth == memDataWidth) @@ -572,7 +578,7 @@ class DataCache(p : DataCacheConfig) extends Component{ } - val lrSc = withLrSc generate new Area{ + val lrSc = withInternalLrSc generate new Area{ val reserved = RegInit(False) when(io.cpu.writeBack.isValid && !io.cpu.writeBack.isStuck && request.isLrsc && !io.cpu.redo && !io.cpu.writeBack.mmuException && !io.cpu.writeBack.unalignedAccess && !io.cpu.writeBack.accessError){ @@ -580,12 +586,16 @@ class DataCache(p : DataCacheConfig) extends Component{ } } - val requestDataBypass = CombInit(request.data) val isAmo = if(withAmo) request.isAmo else False - val internalAmo = withInternalAmo generate new Area{ - def rf = request.data - def mem = dataMux + val isAmoCached = if(withInternalAmo) isAmo else False + val isExternalLsrc = if(withExternalLrSc) request.isLrsc else False + val isExternalAmo = if(withExternalAmo) request.isAmo else False + val requestDataBypass = CombInit(request.data) + import DataCacheExternalAmoStates._ + val amo = withAmo generate new Area{ + def rf = request.data + def mem = if(withInternalAmo) dataMux else io.mem.rsp.data val compare = request.amoCtrl.alu.msb val unsigned = request.amoCtrl.alu(2 downto 1) === B"11" val addSub = (rf.asSInt + Mux(compare, ~mem, mem).asSInt + Mux(compare, S(1), S(0))).asBits @@ -599,8 +609,17 @@ class DataCache(p : DataCacheConfig) extends Component{ B"011" -> (rf & mem), default -> (selectRf ? rf | mem) ) - val resultRegValid = RegNext(True) clearWhen(!io.cpu.writeBack.isStuck) - val resultReg = RegNext(result) +// val resultRegValid = RegNext(True) clearWhen(!io.cpu.writeBack.isStuck) +// val resultReg = RegNext(result) + val resultReg = Reg(Bits(32 bits)) + + val internal = withInternalAmo generate new Area{ + val resultRegValid = RegNext(io.cpu.writeBack.isStuck) + resultReg := result + } + val external = !withInternalAmo generate new Area{ + val state = RegInit(LR_CMD) + } } @@ -620,27 +639,58 @@ class DataCache(p : DataCacheConfig) extends Component{ io.cpu.writeBack.isWrite := request.wr io.mem.cmd.valid := False - io.mem.cmd.address.assignDontCare() - io.mem.cmd.length.assignDontCare() + io.mem.cmd.address := mmuRsp.physicalAddress(tagRange.high downto wordRange.low) @@ U(0, wordRange.low bit) + io.mem.cmd.length := 0 io.mem.cmd.last := True io.mem.cmd.wr := request.wr io.mem.cmd.mask := mask io.mem.cmd.data := requestDataBypass - if(withExternalLrSc) io.mem.cmd.exclusive := request.isLrsc || (if(withAmo) request.isAmo else False) + if(withExternalLrSc) io.mem.cmd.exclusive := request.isLrsc || isAmo - val bypassCache = mmuRsp.isIoAccess || (if(withExternalLrSc) request.isLrsc else False) - val isAmoCached = if(withInternalAmo) isAmo else False + val bypassCache = mmuRsp.isIoAccess || isExternalLsrc || isExternalAmo + + io.cpu.writeBack.keepMemRspData := False when(io.cpu.writeBack.isValid) { - when(bypassCache) { + when(isExternalAmo){ + if(withExternalAmo) switch(amo.external.state){ + is(LR_CMD){ + io.mem.cmd.valid := True + io.mem.cmd.wr := False + when(io.mem.cmd.ready) { + amo.external.state := LR_RSP + } + } + is(LR_RSP){ + when(io.mem.rsp.valid && pending.last) { + amo.external.state := SC_CMD + amo.resultReg := amo.result + } + } + is(SC_CMD){ + io.mem.cmd.valid := True + when(io.mem.cmd.ready) { + amo.external.state := SC_RSP + } + } + is(SC_RSP){ + io.cpu.writeBack.keepMemRspData := True + when(io.mem.rsp.valid) { + amo.external.state := LR_CMD + when(io.mem.rsp.exclusive){ //Success + cpuWriteToCache := True + io.cpu.writeBack.haltIt := False + } + } + } + } + } elsewhen(mmuRsp.isIoAccess || isExternalLsrc) { val waitResponse = !request.wr if(withExternalLrSc) waitResponse setWhen(request.isLrsc) io.cpu.writeBack.haltIt.clearWhen(waitResponse ? (io.mem.rsp.valid && rspSync) | io.mem.cmd.ready) io.mem.cmd.valid := !memCmdSent - io.mem.cmd.address := mmuRsp.physicalAddress(tagRange.high downto wordRange.low) @@ U(0, wordRange.low bit) - io.mem.cmd.length := 0 if(withInternalLrSc) when(request.isLrsc && !lrSc.reserved){ io.mem.cmd.valid := False @@ -657,7 +707,7 @@ class DataCache(p : DataCacheConfig) extends Component{ io.cpu.writeBack.haltIt clearWhen(!request.wr || io.mem.cmd.ready) if(withInternalAmo) when(isAmo){ - when(!internalAmo.resultRegValid) { + when(!amo.internal.resultRegValid) { io.mem.cmd.valid := False dataWriteCmd.valid := False io.cpu.writeBack.haltIt := True @@ -696,17 +746,14 @@ class DataCache(p : DataCacheConfig) extends Component{ } if(withLrSc) when(request.isLrsc && request.wr){ - val success = CombInit(lrSc.reserved) - if(withExternalLrSc) success clearWhen(!io.mem.rsp.exclusive) - + val success = if(withInternalLrSc)lrSc.reserved else io.mem.rsp.exclusive io.cpu.writeBack.data := B(!success).resized - if(withExternalLrSc) when(io.cpu.writeBack.isValid && io.mem.rsp.valid && rspSync && success && waysHit){ cpuWriteToCache := True } } if(withAmo) when(request.isAmo){ - requestDataBypass := internalAmo.resultReg + requestDataBypass := amo.resultReg } //remove side effects on exceptions @@ -716,13 +763,11 @@ class DataCache(p : DataCacheConfig) extends Component{ dataWriteCmd.valid := False loaderValid := False io.cpu.writeBack.haltIt := False + if(withExternalAmo) amo.external.state := LR_CMD } io.cpu.redo setWhen(io.cpu.writeBack.isValid && mmuRsp.refilling) assert(!(io.cpu.writeBack.isValid && !io.cpu.writeBack.haltIt && io.cpu.writeBack.isStuck), "writeBack stuck by another plugin is not allowed") - - - } val loader = new Area{ diff --git a/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala b/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala index 3a13a7c..a128b8b 100644 --- a/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala +++ b/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala @@ -26,7 +26,7 @@ class DBusCachedPlugin(val config : DataCacheConfig, relaxedMemoryTranslationRegister : Boolean = false, csrInfo : Boolean = false) extends Plugin[VexRiscv] with DBusAccessService { import config._ - + assert(!(config.withExternalAmo && !dBusRspSlavePipe)) assert(isPow2(cacheSize)) assert(!(memoryTranslatorPortConfig != null && config.cacheSize/config.wayCount > 4096), "When the D$ is used with MMU, each way can't be bigger than a page (4096 bytes)") @@ -180,7 +180,20 @@ class DBusCachedPlugin(val config : DataCacheConfig, def optionPipe[T](cond : Boolean, on : T)(f : T => T) : T = if(cond) f(on) else on def cmdBuf = optionPipe(dBusCmdSlavePipe, cache.io.mem.cmd)(_.s2mPipe()) dBus.cmd << optionPipe(dBusCmdMasterPipe, cmdBuf)(_.m2sPipe()) - cache.io.mem.rsp << optionPipe(dBusRspSlavePipe,dBus.rsp)(_.m2sPipe()) + cache.io.mem.rsp << (dBusRspSlavePipe match { + case false => dBus.rsp + case true if !withExternalAmo => dBus.rsp.m2sPipe() + case true if withExternalAmo => { + val rsp = Flow (DataCacheMemRsp(cache.p)) + rsp.valid := RegNext(dBus.rsp.valid) + rsp.exclusive := RegNext(dBus.rsp.exclusive) + rsp.error := RegNext(dBus.rsp.error) + rsp.last := RegNext(dBus.rsp.last) + rsp.data := RegNextWhen(dBus.rsp.data, dBus.rsp.valid && !cache.io.cpu.writeBack.keepMemRspData) + rsp + } + }) + if(withInvalidate) cache.io.inv <> inv pipeline plug new Area{ diff --git a/src/test/cpp/regression/main.cpp b/src/test/cpp/regression/main.cpp index 47a822b..c4120c9 100644 --- a/src/test/cpp/regression/main.cpp +++ b/src/test/cpp/regression/main.cpp @@ -926,6 +926,10 @@ public: int32_t src = i32_rs2; int32_t readValue; + #ifdef DBUS_EXCLUSIVE + lrscReserved = false; + #endif + uint32_t pAddr; if(v2p(addr, &pAddr, READ_WRITE)){ trap(0, 15, addr); return; } if(dRead(pAddr, 4, (uint32_t*)&readValue)){ @@ -2358,7 +2362,6 @@ public: ws->dBusAccess(top->dBus_cmd_payload_address,1,2,top->dBus_cmd_payload_mask,&top->dBus_cmd_payload_data,&error); #else bool cancel = false; - DBusCachedTask rsp; if(top->dBus_cmd_payload_exclusive){ bool hit = reservationValid && reservationAddress == top->dBus_cmd_payload_address; rsp.exclusive = hit; From f71f360e325719dff08b304fb9f4a4808a642bc8 Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Fri, 10 Apr 2020 14:27:39 +0200 Subject: [PATCH 14/91] Add SMP synthesis --- src/main/scala/vexriscv/demo/Linux.scala | 4 +++- src/main/scala/vexriscv/demo/SynthesisBench.scala | 11 +++++++++-- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/src/main/scala/vexriscv/demo/Linux.scala b/src/main/scala/vexriscv/demo/Linux.scala index c13ef12..0010fa3 100644 --- a/src/main/scala/vexriscv/demo/Linux.scala +++ b/src/main/scala/vexriscv/demo/Linux.scala @@ -134,7 +134,7 @@ make clean run IBUS=CACHED DBUS=CACHED DEBUG_PLUGIN=STD DHRYSTONE=yes SUPERVISO object LinuxGen { - def configFull(litex : Boolean, withMmu : Boolean) = { + def configFull(litex : Boolean, withMmu : Boolean, withSmp : Boolean = false) = { val config = VexRiscvConfig( plugins = List( //Uncomment the whole IBusSimplePlugin and comment IBusCachedPlugin if you want uncached iBus config @@ -201,6 +201,8 @@ object LinuxGen { catchAccessError = true, catchIllegal = true, catchUnaligned = true, + withExclusive = withSmp, + withInvalidate = withSmp, withLrSc = true, withAmo = true // ) diff --git a/src/main/scala/vexriscv/demo/SynthesisBench.scala b/src/main/scala/vexriscv/demo/SynthesisBench.scala index 3d9dbf9..94d0055 100644 --- a/src/main/scala/vexriscv/demo/SynthesisBench.scala +++ b/src/main/scala/vexriscv/demo/SynthesisBench.scala @@ -146,8 +146,15 @@ object VexRiscvSynthesisBench { SpinalConfig(inlineRom = true).generateVerilog(wrap(new VexRiscv(LinuxGen.configFull(false, true))).setDefinitionName(getRtlPath().split("\\.").head)) } - val rtls = List(twoStage, twoStageBarell, twoStageMulDiv, twoStageAll, smallestNoCsr, smallest, smallAndProductive, smallAndProductiveWithICache, fullNoMmuNoCache, noCacheNoMmuMaxPerf, fullNoMmuMaxPerf, fullNoMmu, full, linuxBalanced) -// val rtls = List(twoStage, twoStageBarell, twoStageMulDiv, twoStageAll) + val linuxBalancedSmp = new Rtl { + override def getName(): String = "VexRiscv linux balanced SMP" + override def getRtlPath(): String = "VexRiscvLinuxBalancedSmp.v" + SpinalConfig(inlineRom = true).generateVerilog(wrap(new VexRiscv(LinuxGen.configFull(false, true, withSmp = true))).setDefinitionName(getRtlPath().split("\\.").head)) + } + + +// val rtls = List(twoStage, twoStageBarell, twoStageMulDiv, twoStageAll, smallestNoCsr, smallest, smallAndProductive, smallAndProductiveWithICache, fullNoMmuNoCache, noCacheNoMmuMaxPerf, fullNoMmuMaxPerf, fullNoMmu, full, linuxBalanced, linuxBalancedSmp) + val rtls = List(linuxBalanced, linuxBalancedSmp) // val rtls = List(smallest) val targets = XilinxStdTargets() ++ AlteraStdTargets() ++ IcestormStdTargets().take(1) ++ List( new Target { From 0ad0f5ed3f12b0919183b6b9f0d23647803044a3 Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Fri, 10 Apr 2020 14:28:16 +0200 Subject: [PATCH 15/91] Add d$ invalidation tests fix d$ invalidation, linux OK --- src/main/scala/vexriscv/TestsWorkspace.scala | 2 +- src/main/scala/vexriscv/ip/DataCache.scala | 46 ++++++++++--------- .../vexriscv/plugin/DBusCachedPlugin.scala | 5 +- src/test/cpp/regression/main.cpp | 39 +++++++++++++++- src/test/cpp/regression/makefile | 7 +++ 5 files changed, 73 insertions(+), 26 deletions(-) diff --git a/src/main/scala/vexriscv/TestsWorkspace.scala b/src/main/scala/vexriscv/TestsWorkspace.scala index 2daf2f0..3cd633b 100644 --- a/src/main/scala/vexriscv/TestsWorkspace.scala +++ b/src/main/scala/vexriscv/TestsWorkspace.scala @@ -27,7 +27,7 @@ import spinal.lib.bus.avalon.AvalonMM import spinal.lib.eda.altera.{InterruptReceiverTag, ResetEmitterTag} -//make clean all SEED=42 MMU=no STOP_ON_ERROR=yes DBUS_EXCLUSIVE=yes SUPERVISOR=yes REDO=10 DHRYSTONE=no LRSC=yes LINUX_REGRESSION=yes TRACE=yes TRACE_START=1000000000000l FLOW_INFO=no +//make clean all SEED=42 MMU=no STOP_ON_ERROR=yes DBUS_EXCLUSIVE=yes DBUS_INVALIDATE=yes SUPERVISOR=yes REDO=0 DHRYSTONE=no LRSC=yes AMO=yes LINUX_REGRESSION=yes TRACE=yes TRACE_START=9546629800l FLOW_INFO=ye object TestsWorkspace { def main(args: Array[String]) { def configFull = { diff --git a/src/main/scala/vexriscv/ip/DataCache.scala b/src/main/scala/vexriscv/ip/DataCache.scala index 9f9ad26..5a7326c 100644 --- a/src/main/scala/vexriscv/ip/DataCache.scala +++ b/src/main/scala/vexriscv/ip/DataCache.scala @@ -581,7 +581,7 @@ class DataCache(val p : DataCacheConfig) extends Component{ val lrSc = withInternalLrSc generate new Area{ val reserved = RegInit(False) when(io.cpu.writeBack.isValid && !io.cpu.writeBack.isStuck && request.isLrsc - && !io.cpu.redo && !io.cpu.writeBack.mmuException && !io.cpu.writeBack.unalignedAccess && !io.cpu.writeBack.accessError){ + && !io.cpu.redo && !io.cpu.writeBack.mmuException && !io.cpu.writeBack.unalignedAccess && !io.cpu.writeBack.accessError){ reserved := !request.wr } } @@ -609,8 +609,8 @@ class DataCache(val p : DataCacheConfig) extends Component{ B"011" -> (rf & mem), default -> (selectRf ? rf | mem) ) -// val resultRegValid = RegNext(True) clearWhen(!io.cpu.writeBack.isStuck) -// val resultReg = RegNext(result) + // val resultRegValid = RegNext(True) clearWhen(!io.cpu.writeBack.isStuck) + // val resultReg = RegNext(result) val resultReg = Reg(Bits(32 bits)) val internal = withInternalAmo generate new Area{ @@ -778,6 +778,7 @@ class DataCache(val p : DataCacheConfig) extends Component{ val waysAllocator = Reg(Bits(wayCount bits)) init(1) val error = RegInit(False) val kill = False + val killReg = RegInit(False) setWhen(kill) when(valid && io.mem.rsp.valid && rspLast){ dataWriteCmd.valid := True @@ -789,19 +790,22 @@ class DataCache(val p : DataCacheConfig) extends Component{ counter.increment() } + val done = CombInit(counter.willOverflow) + if(withInvalidate) done setWhen(valid && pending.counter === 0) //Used to solve invalidate write request at the same time - when(counter.willOverflow){ + when(done){ valid := False //Update tags tagsWriteCmd.valid := True tagsWriteCmd.address := baseAddress(lineRange) - tagsWriteCmd.data.valid := True + tagsWriteCmd.data.valid := !(kill || killReg) tagsWriteCmd.data.address := baseAddress(tagRange) - tagsWriteCmd.data.error := error || io.mem.rsp.error + tagsWriteCmd.data.error := error || (io.mem.rsp.valid && io.mem.rsp.error) tagsWriteCmd.way := waysAllocator error := False + killReg := False } when(!valid){ @@ -810,24 +814,18 @@ class DataCache(val p : DataCacheConfig) extends Component{ io.cpu.redo setWhen(valid) stageB.mmuRspFreeze setWhen(stageB.loaderValid || valid) - - when(kill){ - valid := False - error := False - tagsWriteCmd.valid := False - counter.clear() - } } val invalidate = withInvalidate generate new Area{ - val loaderReadToWriteConflict = False + val readToWriteConflict = False val s0 = new Area{ - val input = io.inv.cmd.haltWhen(loaderReadToWriteConflict) + val input = io.inv.cmd.haltWhen(readToWriteConflict) tagsInvReadCmd.valid := input.fire tagsInvReadCmd.payload := input.address(lineRange) - val loaderHit = loader.valid && input.address(hitRange) === loader.baseAddress(hitRange) - when(loaderHit){ + val loaderTagHit = input.address(tagRange) === loader.baseAddress(tagRange) + val loaderLineHit = input.address(lineRange) === loader.baseAddress(lineRange) + when(input.valid && loader.valid && loaderLineHit && loaderTagHit){ loader.kill := True } } @@ -835,12 +833,13 @@ class DataCache(val p : DataCacheConfig) extends Component{ val input = s0.input.stage() val loaderValid = RegNextWhen(loader.valid, s0.input.ready) val loaderWay = RegNextWhen(loader.waysAllocator, s0.input.ready) - val loaderHit = RegNextWhen(s0.loaderHit, s0.input.ready) + val loaderTagHit = RegNextWhen(s0.loaderTagHit, s0.input.ready) + val loaderLineHit = RegNextWhen(s0.loaderLineHit, s0.input.ready) var wayHits = B(ways.map(way => (input.address(tagRange) === way.tagsInvReadRsp.address && way.tagsInvReadRsp.valid))) //Handle invalider read during loader write hazard - when(loaderValid && !loaderHit){ + when(loaderValid && loaderLineHit && !loaderTagHit){ wayHits \= wayHits & ~loaderWay } } @@ -850,14 +849,19 @@ class DataCache(val p : DataCacheConfig) extends Component{ val wayHit = wayHits.orR when(input.valid) { - stage0.wayInvalidate := wayHits + //Manage invalidate write during cpu read hazard + when(input.address(lineRange) === io.cpu.execute.address(lineRange)) { + stage0.wayInvalidate := wayHits + } + //Invalidate cache tag when(wayHit) { tagsWriteCmd.valid := True tagsWriteCmd.address := input.address(lineRange) tagsWriteCmd.data.valid := False tagsWriteCmd.way := wayHits - loaderReadToWriteConflict := input.address(lineRange) === s0.input.address(lineRange) + readToWriteConflict := input.address(lineRange) === s0.input.address(lineRange) + loader.done := False //Hold loader tags write } } io.inv.rsp.arbitrationFrom(input) diff --git a/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala b/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala index a128b8b..ce3f81c 100644 --- a/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala +++ b/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala @@ -59,6 +59,9 @@ class DBusCachedPlugin(val config : DataCacheConfig, import Riscv._ import pipeline.config._ + dBus = master(DataCacheMemBus(this.config)).setName("dBus") + inv = withInvalidate generate slave(DataCacheInvalidateBus(this.config)).setName("dBus_inv") + val decoderService = pipeline.service(classOf[DecoderService]) val stdActions = List[(Stageable[_ <: BaseType],Any)]( @@ -169,8 +172,6 @@ class DBusCachedPlugin(val config : DataCacheConfig, import pipeline._ import pipeline.config._ - dBus = master(DataCacheMemBus(this.config)).setName("dBus") - inv = withInvalidate generate slave(DataCacheInvalidateBus(this.config)) val cache = new DataCache(this.config.copy( mergeExecuteMemory = writeBack == null diff --git a/src/test/cpp/regression/main.cpp b/src/test/cpp/regression/main.cpp index c4120c9..0307778 100644 --- a/src/test/cpp/regression/main.cpp +++ b/src/test/cpp/regression/main.cpp @@ -1106,6 +1106,7 @@ public: #ifdef TRACE VerilatedVcdC* tfp; #endif + bool allowInvalidate = true; uint32_t seed; @@ -1305,6 +1306,14 @@ public: return this; } + Workspace* withInvalidation(){ + allowInvalidate = true; + return this; + } + Workspace* withoutInvalidation(){ + allowInvalidate = false; + return this; + } virtual bool isPerifRegion(uint32_t addr) { return false; } virtual bool isMmuRegion(uint32_t addr) { return true;} virtual void iBusAccess(uint32_t addr, uint32_t *data, bool *error) { @@ -1777,7 +1786,8 @@ public: uint32_t regFileWriteRefIndex = 0; - char *target = "PROJECT EXECUTION SUCCESSFUL", *hit = target; + const char *target = "PROJECT EXECUTION SUCCESSFUL"; + const char *hit = target; ZephyrRegression(string name) : WorkspaceRegression(name) { cout << endl << endl; @@ -2336,6 +2346,7 @@ struct DBusCachedTask{ class DBusCached : public SimElement{ public: queue rsps; + queue invalidationHint; bool reservationValid = false; uint32_t reservationAddress; @@ -2385,6 +2396,14 @@ public: #endif rsps.push(rsp); } + + #ifdef DBUS_INVALIDATE + if(ws->allowInvalidate){ + if(VL_RANDOM_I(7) < 100){ + invalidationHint.push(top->dBus_cmd_payload_address + VL_RANDOM_I(5)); + } + } + #endif } } } @@ -2410,8 +2429,24 @@ public: top->dBus_rsp_payload_exclusive = VL_RANDOM_I(1); #endif } - top->dBus_cmd_ready = (ws->dStall ? VL_RANDOM_I(7) < 100 : 1); + + #ifdef DBUS_INVALIDATE + if(ws->allowInvalidate){ + if(top->dBus_inv_cmd_ready) top->dBus_inv_cmd_valid = 0; + if(top->dBus_inv_cmd_valid == 0 && VL_RANDOM_I(7) < 10){ + top->dBus_inv_cmd_valid = invalidationHint.empty() == 0; + if(!invalidationHint.empty()){ + top->dBus_inv_cmd_payload_address = invalidationHint.front(); + invalidationHint.pop(); + } else { + top->dBus_inv_cmd_payload_address = VL_RANDOM_I(32); + } + } + } + top->dBus_inv_rsp_ready = (ws->dStall ? VL_RANDOM_I(7) < 100 : 1); + #endif + } }; #endif diff --git a/src/test/cpp/regression/makefile b/src/test/cpp/regression/makefile index 48e5551..160707a 100644 --- a/src/test/cpp/regression/makefile +++ b/src/test/cpp/regression/makefile @@ -16,6 +16,7 @@ EBREAK?=no FENCEI?=no MMU?=yes DBUS_EXCLUSIVE?=no +DBUS_INVALIDATE?=no SEED?=no LRSC?=no AMO?=no @@ -45,6 +46,9 @@ ADDCFLAGS += -CFLAGS -DIBUS_${IBUS} ADDCFLAGS += -CFLAGS -DDBUS_${DBUS} ADDCFLAGS += -CFLAGS -DREDO=${REDO} ADDCFLAGS += -CFLAGS -pthread +ADDCFLAGS += -CFLAGS -Wno-unused-result + + ADDCFLAGS += -CFLAGS -DTHREAD_COUNT=${THREAD_COUNT} @@ -221,6 +225,9 @@ endif ifeq ($(DBUS_EXCLUSIVE),yes) ADDCFLAGS += -CFLAGS -DDBUS_EXCLUSIVE endif +ifeq ($(DBUS_INVALIDATE),yes) + ADDCFLAGS += -CFLAGS -DDBUS_INVALIDATE +endif ifeq ($(MUL),yes) ADDCFLAGS += -CFLAGS -DMUL From 4a9b8c1f724a359167e35c92d5978a46622434ca Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Fri, 10 Apr 2020 14:44:28 +0200 Subject: [PATCH 16/91] improve invalidation read during write hazard logic --- src/main/scala/vexriscv/ip/DataCache.scala | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/main/scala/vexriscv/ip/DataCache.scala b/src/main/scala/vexriscv/ip/DataCache.scala index 5a7326c..9a4dfc8 100644 --- a/src/main/scala/vexriscv/ip/DataCache.scala +++ b/src/main/scala/vexriscv/ip/DataCache.scala @@ -817,9 +817,8 @@ class DataCache(val p : DataCacheConfig) extends Component{ } val invalidate = withInvalidate generate new Area{ - val readToWriteConflict = False val s0 = new Area{ - val input = io.inv.cmd.haltWhen(readToWriteConflict) + val input = io.inv.cmd tagsInvReadCmd.valid := input.fire tagsInvReadCmd.payload := input.address(lineRange) @@ -835,8 +834,9 @@ class DataCache(val p : DataCacheConfig) extends Component{ val loaderWay = RegNextWhen(loader.waysAllocator, s0.input.ready) val loaderTagHit = RegNextWhen(s0.loaderTagHit, s0.input.ready) val loaderLineHit = RegNextWhen(s0.loaderLineHit, s0.input.ready) + val invalidations = Bits(wayCount bits) - var wayHits = B(ways.map(way => (input.address(tagRange) === way.tagsInvReadRsp.address && way.tagsInvReadRsp.valid))) + var wayHits = B(ways.map(way => (input.address(tagRange) === way.tagsInvReadRsp.address && way.tagsInvReadRsp.valid))) & ~invalidations //Handle invalider read during loader write hazard when(loaderValid && loaderLineHit && !loaderTagHit){ @@ -860,12 +860,14 @@ class DataCache(val p : DataCacheConfig) extends Component{ tagsWriteCmd.address := input.address(lineRange) tagsWriteCmd.data.valid := False tagsWriteCmd.way := wayHits - readToWriteConflict := input.address(lineRange) === s0.input.address(lineRange) loader.done := False //Hold loader tags write } } io.inv.rsp.arbitrationFrom(input) io.inv.rsp.hit := wayHit + + //Manage invalidation read during write hazard + s1.invalidations := RegNext(input.valid ? wayHits | 0) } } } \ No newline at end of file From abbfaf6bcf6afeb355d915881c20c386eeae23df Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Fri, 10 Apr 2020 18:58:03 +0200 Subject: [PATCH 17/91] regression : restore normal invalidation setup --- src/test/cpp/regression/main.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/test/cpp/regression/main.cpp b/src/test/cpp/regression/main.cpp index 0307778..3b5122b 100644 --- a/src/test/cpp/regression/main.cpp +++ b/src/test/cpp/regression/main.cpp @@ -2399,7 +2399,7 @@ public: #ifdef DBUS_INVALIDATE if(ws->allowInvalidate){ - if(VL_RANDOM_I(7) < 100){ + if(VL_RANDOM_I(7) < 10){ invalidationHint.push(top->dBus_cmd_payload_address + VL_RANDOM_I(5)); } } @@ -2434,8 +2434,8 @@ public: #ifdef DBUS_INVALIDATE if(ws->allowInvalidate){ if(top->dBus_inv_cmd_ready) top->dBus_inv_cmd_valid = 0; - if(top->dBus_inv_cmd_valid == 0 && VL_RANDOM_I(7) < 10){ - top->dBus_inv_cmd_valid = invalidationHint.empty() == 0; + if(top->dBus_inv_cmd_valid == 0 && VL_RANDOM_I(7) < 5){ + top->dBus_inv_cmd_valid = 1; if(!invalidationHint.empty()){ top->dBus_inv_cmd_payload_address = invalidationHint.front(); invalidationHint.pop(); From 467a2bc488a248fd818960a282dd9e3336be5bc3 Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Sat, 11 Apr 2020 19:06:22 +0200 Subject: [PATCH 18/91] refactor DBus invalidation, and add invalidation enable --- src/main/scala/vexriscv/ip/DataCache.scala | 48 +++++++++++-------- .../vexriscv/plugin/DBusCachedPlugin.scala | 7 +-- src/test/cpp/regression/main.cpp | 13 ++--- 3 files changed, 40 insertions(+), 28 deletions(-) diff --git a/src/main/scala/vexriscv/ip/DataCache.scala b/src/main/scala/vexriscv/ip/DataCache.scala index 9a4dfc8..ec6ad57 100644 --- a/src/main/scala/vexriscv/ip/DataCache.scala +++ b/src/main/scala/vexriscv/ip/DataCache.scala @@ -185,30 +185,29 @@ case class DataCacheMemRsp(p : DataCacheConfig) extends Bundle{ val error = Bool val exclusive = p.withExclusive generate Bool() } -case class DataCacheInvalidateCmd(p : DataCacheConfig) extends Bundle{ +case class DataCacheInv(p : DataCacheConfig) extends Bundle{ + val enable = Bool() val address = UInt(p.addressWidth bit) } -case class DataCacheInvalidateRsp(p : DataCacheConfig) extends Bundle{ +case class DataCacheAck(p : DataCacheConfig) extends Bundle{ val hit = Bool() } -case class DataCacheInvalidateBus(p : DataCacheConfig) extends Bundle with IMasterSlave { - val cmd = Stream(DataCacheInvalidateCmd(p)) - val rsp = Stream(DataCacheInvalidateRsp(p)) - - override def asMaster(): Unit = { - master(cmd) - slave(rsp) - } -} - case class DataCacheMemBus(p : DataCacheConfig) extends Bundle with IMasterSlave{ val cmd = Stream (DataCacheMemCmd(p)) val rsp = Flow (DataCacheMemRsp(p)) + val inv = p.withInvalidate generate Stream(DataCacheInv(p)) + val ack = p.withInvalidate generate Stream(DataCacheAck(p)) + override def asMaster(): Unit = { master(cmd) slave(rsp) + + if(p.withInvalidate) { + slave(inv) + master(ack) + } } def toAxi4Shared(stageCmd : Boolean = false, pendingWritesMax : Int = 7): Axi4Shared = { @@ -353,14 +352,26 @@ case class DataCacheMemBus(p : DataCacheConfig) extends Bundle with IMasterSlave bus.cmd.data := cmd.data bus.cmd.length := (cmd.length << 2) | 3 //TODO better sub word access bus.cmd.mask := cmd.mask + if(p.withExclusive) bus.cmd.exclusive := cmd.exclusive cmd.ready := bus.cmd.ready rsp.valid := bus.rsp.valid && !bus.rsp.context(0) rsp.data := bus.rsp.data rsp.error := bus.rsp.isError + if(p.withExclusive) rsp.exclusive := bus.rsp.exclusive bus.rsp.ready := True + if(p.withInvalidate){ + bus.ack.arbitrationFrom(ack) + //TODO manage lenght ? + inv.address := bus.inv.address +// inv.opcode := bus.inv.opcode + ??? + + bus.ack.arbitrationFrom(ack) + } + bus } @@ -378,7 +389,6 @@ class DataCache(val p : DataCacheConfig) extends Component{ val io = new Bundle{ val cpu = slave(DataCacheCpuBus(p)) val mem = master(DataCacheMemBus(p)) - val inv = withInvalidate generate slave(DataCacheInvalidateBus(p)) } val haltCpu = False @@ -818,13 +828,13 @@ class DataCache(val p : DataCacheConfig) extends Component{ val invalidate = withInvalidate generate new Area{ val s0 = new Area{ - val input = io.inv.cmd + val input = io.mem.inv tagsInvReadCmd.valid := input.fire tagsInvReadCmd.payload := input.address(lineRange) val loaderTagHit = input.address(tagRange) === loader.baseAddress(tagRange) val loaderLineHit = input.address(lineRange) === loader.baseAddress(lineRange) - when(input.valid && loader.valid && loaderLineHit && loaderTagHit){ + when(input.valid && input.enable && loader.valid && loaderLineHit && loaderTagHit){ loader.kill := True } } @@ -848,7 +858,7 @@ class DataCache(val p : DataCacheConfig) extends Component{ val wayHits = RegNextWhen(s1.wayHits, s1.input.ready) val wayHit = wayHits.orR - when(input.valid) { + when(input.valid && input.enable) { //Manage invalidate write during cpu read hazard when(input.address(lineRange) === io.cpu.execute.address(lineRange)) { stage0.wayInvalidate := wayHits @@ -863,11 +873,11 @@ class DataCache(val p : DataCacheConfig) extends Component{ loader.done := False //Hold loader tags write } } - io.inv.rsp.arbitrationFrom(input) - io.inv.rsp.hit := wayHit + io.mem.ack.arbitrationFrom(input) + io.mem.ack.hit := wayHit //Manage invalidation read during write hazard - s1.invalidations := RegNext(input.valid ? wayHits | 0) + s1.invalidations := RegNext((input.valid && input.enable) ? wayHits | 0) } } } \ No newline at end of file diff --git a/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala b/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala index ce3f81c..c27cae7 100644 --- a/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala +++ b/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala @@ -31,7 +31,6 @@ class DBusCachedPlugin(val config : DataCacheConfig, assert(!(memoryTranslatorPortConfig != null && config.cacheSize/config.wayCount > 4096), "When the D$ is used with MMU, each way can't be bigger than a page (4096 bytes)") var dBus : DataCacheMemBus = null - var inv : DataCacheInvalidateBus = null var mmuBus : MemoryTranslatorBus = null var exceptionBus : Flow[ExceptionCause] = null var privilegeService : PrivilegeService = null @@ -60,7 +59,6 @@ class DBusCachedPlugin(val config : DataCacheConfig, import pipeline.config._ dBus = master(DataCacheMemBus(this.config)).setName("dBus") - inv = withInvalidate generate slave(DataCacheInvalidateBus(this.config)).setName("dBus_inv") val decoderService = pipeline.service(classOf[DecoderService]) @@ -195,7 +193,10 @@ class DBusCachedPlugin(val config : DataCacheConfig, } }) - if(withInvalidate) cache.io.inv <> inv + if(withInvalidate) { + cache.io.mem.inv << dBus.inv + cache.io.mem.ack >> dBus.ack + } pipeline plug new Area{ //Memory bandwidth counter diff --git a/src/test/cpp/regression/main.cpp b/src/test/cpp/regression/main.cpp index 3b5122b..6070183 100644 --- a/src/test/cpp/regression/main.cpp +++ b/src/test/cpp/regression/main.cpp @@ -2433,18 +2433,19 @@ public: #ifdef DBUS_INVALIDATE if(ws->allowInvalidate){ - if(top->dBus_inv_cmd_ready) top->dBus_inv_cmd_valid = 0; - if(top->dBus_inv_cmd_valid == 0 && VL_RANDOM_I(7) < 5){ - top->dBus_inv_cmd_valid = 1; + if(top->dBus_inv_ready) top->dBus_inv_valid = 0; + if(top->dBus_inv_valid == 0 && VL_RANDOM_I(7) < 5){ + top->dBus_inv_valid = 1; + top->dBus_inv_payload_enable = VL_RANDOM_I(7) < 100; if(!invalidationHint.empty()){ - top->dBus_inv_cmd_payload_address = invalidationHint.front(); + top->dBus_inv_payload_address = invalidationHint.front(); invalidationHint.pop(); } else { - top->dBus_inv_cmd_payload_address = VL_RANDOM_I(32); + top->dBus_inv_payload_address = VL_RANDOM_I(32); } } } - top->dBus_inv_rsp_ready = (ws->dStall ? VL_RANDOM_I(7) < 100 : 1); + top->dBus_ack_ready = (ws->dStall ? VL_RANDOM_I(7) < 100 : 1); #endif } From 46207abbc42ddce91a49bc04288e0cc700220a60 Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Thu, 16 Apr 2020 01:28:38 +0200 Subject: [PATCH 19/91] dataCache now implement invalidation sync --- src/main/scala/vexriscv/ip/DataCache.scala | 81 ++++++++++++++----- .../vexriscv/plugin/DBusCachedPlugin.scala | 7 +- 2 files changed, 66 insertions(+), 22 deletions(-) diff --git a/src/main/scala/vexriscv/ip/DataCache.scala b/src/main/scala/vexriscv/ip/DataCache.scala index ec6ad57..8005440 100644 --- a/src/main/scala/vexriscv/ip/DataCache.scala +++ b/src/main/scala/vexriscv/ip/DataCache.scala @@ -80,11 +80,16 @@ case class DataCacheConfig(cacheSize : Int, dataWidth = 32, lengthWidth = log2Up(this.bytePerLine), sourceWidth = 0, - contextWidth = 1, + contextWidth = if(!withWriteResponse) 1 else 0, canRead = true, canWrite = true, alignment = BmbParameter.BurstAlignement.LENGTH, - maximumPendingTransactionPerId = Int.MaxValue + maximumPendingTransactionPerId = Int.MaxValue, + canInvalidate = withInvalidate, + canSync = withInvalidate, + canExclusive = withExclusive, + invalidateLength = log2Up(this.bytePerLine), + invalidateAlignment = BmbParameter.BurstAlignement.LENGTH ) } @@ -193,12 +198,17 @@ case class DataCacheAck(p : DataCacheConfig) extends Bundle{ val hit = Bool() } +case class DataCacheSync(p : DataCacheConfig) extends Bundle{ + +} + case class DataCacheMemBus(p : DataCacheConfig) extends Bundle with IMasterSlave{ val cmd = Stream (DataCacheMemCmd(p)) val rsp = Flow (DataCacheMemRsp(p)) val inv = p.withInvalidate generate Stream(DataCacheInv(p)) val ack = p.withInvalidate generate Stream(DataCacheAck(p)) + val sync = p.withInvalidate generate Stream(DataCacheSync(p)) override def asMaster(): Unit = { master(cmd) @@ -207,6 +217,7 @@ case class DataCacheMemBus(p : DataCacheConfig) extends Bundle with IMasterSlave if(p.withInvalidate) { slave(inv) master(ack) + slave(sync) } } @@ -342,11 +353,11 @@ case class DataCacheMemBus(p : DataCacheConfig) extends Bundle with IMasterSlave def toBmb() : Bmb = { val pipelinedMemoryBusConfig = p.getBmbParameter() - val bus = Bmb(pipelinedMemoryBusConfig) + val bus = Bmb(pipelinedMemoryBusConfig).setCompositeName(this,"toBmb", true) bus.cmd.valid := cmd.valid bus.cmd.last := cmd.last - bus.cmd.context(0) := cmd.wr + if(!p.withWriteResponse) bus.cmd.context(0) := cmd.wr bus.cmd.opcode := (cmd.wr ? B(Bmb.Cmd.Opcode.WRITE) | B(Bmb.Cmd.Opcode.READ)) bus.cmd.address := cmd.address.resized bus.cmd.data := cmd.data @@ -356,22 +367,33 @@ case class DataCacheMemBus(p : DataCacheConfig) extends Bundle with IMasterSlave cmd.ready := bus.cmd.ready - rsp.valid := bus.rsp.valid && !bus.rsp.context(0) + rsp.valid := bus.rsp.valid + if(!p.withWriteResponse) rsp.valid clearWhen(bus.rsp.context(0)) rsp.data := bus.rsp.data rsp.error := bus.rsp.isError + rsp.last := bus.rsp.last if(p.withExclusive) rsp.exclusive := bus.rsp.exclusive bus.rsp.ready := True if(p.withInvalidate){ - bus.ack.arbitrationFrom(ack) - //TODO manage lenght ? + inv.arbitrationFrom(bus.inv) inv.address := bus.inv.address -// inv.opcode := bus.inv.opcode - ??? + inv.enable := bus.inv.all bus.ack.arbitrationFrom(ack) + + sync.arbitrationFrom(bus.sync) + +// bus.ack.arbitrationFrom(ack) +// //TODO manage lenght ? +// inv.address := bus.inv.address +//// inv.opcode := bus.inv.opcode +// ??? +// +// bus.ack.arbitrationFrom(ack) } + bus } @@ -440,9 +462,10 @@ class DataCache(val p : DataCacheConfig) extends Component{ //Reads val tagsReadRsp = tags.readSync(tagsReadCmd.payload, tagsReadCmd.valid && !io.cpu.memory.isStuck) - val tagsInvReadRsp = withInvalidate generate tags.readSync(tagsInvReadCmd.payload, tagsReadCmd.valid && !io.cpu.memory.isStuck) val dataReadRsp = data.readSync(dataReadCmd.payload, dataReadCmd.valid && !io.cpu.memory.isStuck) + val tagsInvReadRsp = withInvalidate generate tags.readSync(tagsInvReadCmd.payload, tagsInvReadCmd.valid) + //Writes when(tagsWriteCmd.valid && tagsWriteCmd.way(i)){ tags.write(tagsWriteCmd.address, tagsWriteCmd.data) @@ -494,9 +517,22 @@ class DataCache(val p : DataCacheConfig) extends Component{ val full = RegNext(counter.msb) val last = counter === 1 + if(!withInvalidate) { + io.cpu.execute.haltIt setWhen(full) + } + + rspSync clearWhen (!last || !memCmdSent) + rspLast clearWhen (!last) + } + + val sync = withInvalidate generate new Area{ + io.mem.sync.ready := True + + val counter = Reg(UInt(log2Up(pendingMax) + 1 bits)) init(0) + counter := counter + U(io.mem.cmd.fire && io.mem.cmd.wr) - U(io.mem.sync.fire) + + val full = RegNext(counter.msb) io.cpu.execute.haltIt setWhen(full) - rspSync clearWhen(!last || !memCmdSent) - rspLast clearWhen(!last) } @@ -509,9 +545,12 @@ class DataCache(val p : DataCacheConfig) extends Component{ val dataColisions = collisionProcess(io.cpu.execute.address(lineRange.high downto wordRange.low), mask) val wayInvalidate = B(0, wayCount bits) //Used if invalidate enabled - if(withWriteResponse) when(io.cpu.execute.fence){ - when(pending.counter =/= 0 || io.cpu.memory.isValid || io.cpu.writeBack.isValid){ - io.cpu.execute.haltIt := True + when(io.cpu.execute.fence){ + val counter = if(withInvalidate) sync.counter else if(withWriteResponse) pending.counter else null + if(counter != null){ + when(counter =/= 0 || io.cpu.memory.isValid || io.cpu.writeBack.isValid){ + io.cpu.execute.haltIt := True + } } } } @@ -563,16 +602,19 @@ class DataCache(val p : DataCacheConfig) extends Component{ //Evict the cache after reset logics val flusher = new Area { val valid = RegInit(False) + val hold = False when(valid) { tagsWriteCmd.valid := valid tagsWriteCmd.address := mmuRsp.physicalAddress(lineRange) tagsWriteCmd.way.setAll() tagsWriteCmd.data.valid := False io.cpu.writeBack.haltIt := True - when(mmuRsp.physicalAddress(lineRange) =/= wayLineCount - 1) { - mmuRsp.physicalAddress.getDrivingReg(lineRange) := mmuRsp.physicalAddress(lineRange) + 1 - } otherwise { - valid := False + when(!hold) { + when(mmuRsp.physicalAddress(lineRange) =/= wayLineCount - 1) { + mmuRsp.physicalAddress.getDrivingReg(lineRange) := mmuRsp.physicalAddress(lineRange) + 1 + } otherwise { + valid := False + } } } @@ -867,6 +909,7 @@ class DataCache(val p : DataCacheConfig) extends Component{ //Invalidate cache tag when(wayHit) { tagsWriteCmd.valid := True + stageB.flusher.hold := True tagsWriteCmd.address := input.address(lineRange) tagsWriteCmd.data.valid := False tagsWriteCmd.way := wayHits diff --git a/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala b/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala index c27cae7..abd6d52 100644 --- a/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala +++ b/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala @@ -184,7 +184,7 @@ class DBusCachedPlugin(val config : DataCacheConfig, case true if !withExternalAmo => dBus.rsp.m2sPipe() case true if withExternalAmo => { val rsp = Flow (DataCacheMemRsp(cache.p)) - rsp.valid := RegNext(dBus.rsp.valid) + rsp.valid := RegNext(dBus.rsp.valid) init(False) rsp.exclusive := RegNext(dBus.rsp.exclusive) rsp.error := RegNext(dBus.rsp.error) rsp.last := RegNext(dBus.rsp.last) @@ -194,8 +194,9 @@ class DBusCachedPlugin(val config : DataCacheConfig, }) if(withInvalidate) { - cache.io.mem.inv << dBus.inv - cache.io.mem.ack >> dBus.ack + cache.io.mem.inv << dBus.inv + cache.io.mem.ack >> dBus.ack + cache.io.mem.sync << dBus.sync } pipeline plug new Area{ From b9ceabf128492bbddae09fc1ccf1492ffd48fa9b Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Thu, 16 Apr 2020 01:29:13 +0200 Subject: [PATCH 20/91] few fixes --- .../scala/vexriscv/ip/InstructionCache.scala | 2 +- src/main/scala/vexriscv/plugin/DebugPlugin.scala | 2 +- src/test/cpp/raw/common/asm.mk | 2 +- src/test/cpp/regression/main.cpp | 16 ++++++++++++++++ 4 files changed, 19 insertions(+), 3 deletions(-) diff --git a/src/main/scala/vexriscv/ip/InstructionCache.scala b/src/main/scala/vexriscv/ip/InstructionCache.scala index 4df0f79..09b1a8a 100644 --- a/src/main/scala/vexriscv/ip/InstructionCache.scala +++ b/src/main/scala/vexriscv/ip/InstructionCache.scala @@ -251,7 +251,7 @@ case class InstructionCacheMemBus(p : InstructionCacheConfig) extends Bundle wit def toBmb() : Bmb = { val busParameter = p.getBmbParameter - val bus = Bmb(busParameter) + val bus = Bmb(busParameter).setCompositeName(this,"toBmb", true) bus.cmd.arbitrationFrom(cmd) bus.cmd.opcode := Bmb.Cmd.Opcode.READ bus.cmd.address := cmd.address.resized diff --git a/src/main/scala/vexriscv/plugin/DebugPlugin.scala b/src/main/scala/vexriscv/plugin/DebugPlugin.scala index c04d167..34a878a 100644 --- a/src/main/scala/vexriscv/plugin/DebugPlugin.scala +++ b/src/main/scala/vexriscv/plugin/DebugPlugin.scala @@ -96,7 +96,7 @@ case class DebugExtensionIo() extends Bundle with IMasterSlave{ -class DebugPlugin(val debugClockDomain : ClockDomain, hardwareBreakpointCount : Int = 0) extends Plugin[VexRiscv] { +class DebugPlugin(var debugClockDomain : ClockDomain, hardwareBreakpointCount : Int = 0) extends Plugin[VexRiscv] { var io : DebugExtensionIo = null val injectionAsks = ArrayBuffer[(Stage, Bool)]() diff --git a/src/test/cpp/raw/common/asm.mk b/src/test/cpp/raw/common/asm.mk index 3d4b205..b63c80a 100644 --- a/src/test/cpp/raw/common/asm.mk +++ b/src/test/cpp/raw/common/asm.mk @@ -40,7 +40,7 @@ OBJS := $(addprefix $(OBJDIR)/,$(OBJS)) -all: $(OBJDIR)/$(PROJ_NAME).elf $(OBJDIR)/$(PROJ_NAME).hex $(OBJDIR)/$(PROJ_NAME).asm +all: $(OBJDIR)/$(PROJ_NAME).elf $(OBJDIR)/$(PROJ_NAME).hex $(OBJDIR)/$(PROJ_NAME).asm $(OBJDIR)/$(PROJ_NAME).bin @echo "done" $(OBJDIR)/%.elf: $(OBJS) | $(OBJDIR) diff --git a/src/test/cpp/regression/main.cpp b/src/test/cpp/regression/main.cpp index 6070183..67d7a0a 100644 --- a/src/test/cpp/regression/main.cpp +++ b/src/test/cpp/regression/main.cpp @@ -2350,6 +2350,7 @@ public: bool reservationValid = false; uint32_t reservationAddress; + uint32_t pendingSync = 0; Workspace *ws; VVexRiscv* top; @@ -2363,11 +2364,17 @@ public: virtual void onReset(){ top->dBus_cmd_ready = 1; top->dBus_rsp_valid = 0; + top->dBus_inv_valid = 0; + top->dBus_ack_ready = 0; + top->dBus_sync_valid = 0; } virtual void preCycle(){ if (top->dBus_cmd_valid && top->dBus_cmd_ready) { if(top->dBus_cmd_payload_wr){ + #ifdef DBUS_INVALIDATE + pendingSync += 1; + #endif #ifndef DBUS_EXCLUSIVE bool error; ws->dBusAccess(top->dBus_cmd_payload_address,1,2,top->dBus_cmd_payload_mask,&top->dBus_cmd_payload_data,&error); @@ -2406,6 +2413,11 @@ public: #endif } } + #ifdef DBUS_INVALIDATE + if(top->dBus_sync_valid && top->dBus_sync_ready){ + pendingSync -= 1; + } + #endif } virtual void postCycle(){ @@ -2446,6 +2458,10 @@ public: } } top->dBus_ack_ready = (ws->dStall ? VL_RANDOM_I(7) < 100 : 1); + if(top->dBus_sync_ready) top->dBus_sync_valid = 0; + if(top->dBus_sync_valid == 0 && pendingSync != 0 && (ws->dStall ? VL_RANDOM_I(7) < 80 : 1) ){ + top->dBus_sync_valid = 1; + } #endif } From 73c21177e511aae1dc7dae800e2ad7fb8c1f8297 Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Thu, 16 Apr 2020 01:30:03 +0200 Subject: [PATCH 21/91] Add VexRiscvSmpCluster, seem to work on simple case --- .../demo/smp/VexRiscvSmpCluster.scala | 302 ++++++++++++++++++ src/test/cpp/raw/smp/.gitignore | 5 + src/test/cpp/raw/smp/build/smp.asm | 108 +++++++ src/test/cpp/raw/smp/makefile | 5 + src/test/cpp/raw/smp/src/crt.S | 70 ++++ src/test/cpp/raw/smp/src/ld | 16 + 6 files changed, 506 insertions(+) create mode 100644 src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala create mode 100644 src/test/cpp/raw/smp/.gitignore create mode 100644 src/test/cpp/raw/smp/build/smp.asm create mode 100644 src/test/cpp/raw/smp/makefile create mode 100644 src/test/cpp/raw/smp/src/crt.S create mode 100644 src/test/cpp/raw/smp/src/ld diff --git a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala new file mode 100644 index 0000000..474357f --- /dev/null +++ b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala @@ -0,0 +1,302 @@ +package vexriscv.demo.smp + +import spinal.core._ +import spinal.lib._ +import spinal.lib.bus.bmb.sim.BmbMemoryAgent +import spinal.lib.bus.bmb.{Bmb, BmbArbiter, BmbDecoder, BmbExclusiveMonitor, BmbInvalidateMonitor, BmbParameter} +import spinal.lib.com.jtag.Jtag +import spinal.lib.com.jtag.sim.JtagTcp +import vexriscv.ip.{DataCacheAck, DataCacheConfig, DataCacheMemBus, InstructionCacheConfig} +import vexriscv.plugin.{BranchPlugin, CsrPlugin, CsrPluginConfig, DBusCachedPlugin, DBusSimplePlugin, DebugPlugin, DecoderSimplePlugin, FullBarrelShifterPlugin, HazardSimplePlugin, IBusCachedPlugin, IBusSimplePlugin, IntAluPlugin, MmuPlugin, MmuPortConfig, MulDivIterativePlugin, MulPlugin, RegFilePlugin, STATIC, SrcPlugin, YamlPlugin} +import vexriscv.{VexRiscv, VexRiscvConfig, plugin} + + +case class VexRiscvSmpClusterParameter( cpuConfigs : Seq[VexRiscvConfig]) + +case class VexRiscvSmpCluster(p : VexRiscvSmpClusterParameter, + debugClockDomain : ClockDomain) extends Component{ + val dBusParameter = p.cpuConfigs.head.plugins.find(_.isInstanceOf[DBusCachedPlugin]).get.asInstanceOf[DBusCachedPlugin].config.getBmbParameter() + val dBusArbiterParameter = dBusParameter.copy(sourceWidth = log2Up(p.cpuConfigs.size)) + val exclusiveMonitorParameter = dBusArbiterParameter + val invalidateMonitorParameter = BmbExclusiveMonitor.outputParameter(exclusiveMonitorParameter) + val dMemParameter = BmbInvalidateMonitor.outputParameter(invalidateMonitorParameter) + + val iBusParameter = p.cpuConfigs.head.plugins.find(_.isInstanceOf[IBusCachedPlugin]).get.asInstanceOf[IBusCachedPlugin].config.getBmbParameter() + val iBusArbiterParameter = iBusParameter.copy(sourceWidth = log2Up(p.cpuConfigs.size)) + val iMemParameter = iBusArbiterParameter + + val io = new Bundle { + val dMem = master(Bmb(dMemParameter)) + val iMem = master(Bmb(iMemParameter)) + val timerInterrupts = in Bits(p.cpuConfigs.size bits) + val externalInterrupts = in Bits(p.cpuConfigs.size bits) + val externalSupervisorInterrupts = in Bits(p.cpuConfigs.size bits) + val jtag = slave(Jtag()) + val debugReset = out Bool() + } + + val cpus = for((cpuConfig, cpuId) <- p.cpuConfigs.zipWithIndex) yield new Area{ + var iBus : Bmb = null + var dBus : Bmb = null + cpuConfig.plugins.foreach { + case plugin: DebugPlugin => debugClockDomain{ + plugin.debugClockDomain = debugClockDomain + } + case _ => + } + val core = new VexRiscv(cpuConfig) + core.plugins.foreach { + case plugin: IBusCachedPlugin => iBus = plugin.iBus.toBmb() + case plugin: DBusCachedPlugin => dBus = plugin.dBus.toBmb() + case plugin: CsrPlugin => { + plugin.externalInterrupt := io.externalInterrupts(cpuId) + plugin.timerInterrupt := io.timerInterrupts(cpuId) + if (plugin.config.supervisorGen) plugin.externalInterruptS := io.externalSupervisorInterrupts(cpuId) + } + case plugin: DebugPlugin => debugClockDomain{ + io.debugReset := RegNext(plugin.io.resetOut) + io.jtag <> plugin.io.bus.fromJtag() + } + case _ => + } + } + + val dBusArbiter = BmbArbiter( + p = dBusArbiterParameter, + portCount = cpus.size, + pendingRspMax = 64, + lowerFirstPriority = false, + inputsWithInv = cpus.map(_ => true), + inputsWithSync = cpus.map(_ => true), + pendingInvMax = 16 + ) + + (dBusArbiter.io.inputs, cpus).zipped.foreach(_ << _.dBus) + + val exclusiveMonitor = BmbExclusiveMonitor( + inputParameter = exclusiveMonitorParameter, + pendingWriteMax = 64 + ) + exclusiveMonitor.io.input << dBusArbiter.io.output + + val invalidateMonitor = BmbInvalidateMonitor( + inputParameter = invalidateMonitorParameter, + pendingInvMax = 16 + ) + invalidateMonitor.io.input << exclusiveMonitor.io.output + + io.dMem << invalidateMonitor.io.output + + val iBusArbiter = BmbArbiter( + p = iBusArbiterParameter, + portCount = cpus.size, + pendingRspMax = 64, + lowerFirstPriority = false, + inputsWithInv = cpus.map(_ => true), + inputsWithSync = cpus.map(_ => true), + pendingInvMax = 16 + ) + + (iBusArbiter.io.inputs, cpus).zipped.foreach(_ << _.iBus) + io.iMem << iBusArbiter.io.output +} + + + +object VexRiscvSmpClusterGen { + def vexRiscvConfig(id : Int) = { + val config = VexRiscvConfig( + plugins = List( + new MmuPlugin( + ioRange = x => x(31 downto 28) === 0xF + ), + //Uncomment the whole IBusSimplePlugin and comment IBusCachedPlugin if you want uncached iBus config + // new IBusSimplePlugin( + // resetVector = 0x80000000l, + // cmdForkOnSecondStage = false, + // cmdForkPersistence = false, + // prediction = DYNAMIC_TARGET, + // historyRamSizeLog2 = 10, + // catchAccessFault = true, + // compressedGen = true, + // busLatencyMin = 1, + // injectorStage = true, + // memoryTranslatorPortConfig = withMmu generate MmuPortConfig( + // portTlbSize = 4 + // ) + // ), + + //Uncomment the whole IBusCachedPlugin and comment IBusSimplePlugin if you want cached iBus config + new IBusCachedPlugin( + resetVector = 0x80000000l, + compressedGen = false, + prediction = STATIC, + injectorStage = false, + config = InstructionCacheConfig( + cacheSize = 4096*1, + bytePerLine = 32, + wayCount = 1, + addressWidth = 32, + cpuDataWidth = 32, + memDataWidth = 32, + catchIllegalAccess = true, + catchAccessFault = true, + asyncTagMemory = false, + twoCycleRam = false, + twoCycleCache = true + // ) + ), + memoryTranslatorPortConfig = MmuPortConfig( + portTlbSize = 4 + ) + ), + // ).newTightlyCoupledPort(TightlyCoupledPortParameter("iBusTc", a => a(30 downto 28) === 0x0 && a(5))), + // new DBusSimplePlugin( + // catchAddressMisaligned = true, + // catchAccessFault = true, + // earlyInjection = false, + // withLrSc = true, + // memoryTranslatorPortConfig = withMmu generate MmuPortConfig( + // portTlbSize = 4 + // ) + // ), + new DBusCachedPlugin( + dBusCmdMasterPipe = true, + dBusCmdSlavePipe = true, + dBusRspSlavePipe = true, + config = new DataCacheConfig( + cacheSize = 4096*1, + bytePerLine = 32, + wayCount = 1, + addressWidth = 32, + cpuDataWidth = 32, + memDataWidth = 32, + catchAccessError = true, + catchIllegal = true, + catchUnaligned = true, + withLrSc = true, + withAmo = true, + withExclusive = true, + withInvalidate = true + // ) + ), + memoryTranslatorPortConfig = MmuPortConfig( + portTlbSize = 4 + ) + ), + + // new MemoryTranslatorPlugin( + // tlbSize = 32, + // virtualRange = _(31 downto 28) === 0xC, + // ioRange = _(31 downto 28) === 0xF + // ), + + new DecoderSimplePlugin( + catchIllegalInstruction = true + ), + new RegFilePlugin( + regFileReadyKind = plugin.SYNC, + zeroBoot = true + ), + new IntAluPlugin, + new SrcPlugin( + separatedAddSub = false + ), + new FullBarrelShifterPlugin(earlyInjection = false), + // new LightShifterPlugin, + new HazardSimplePlugin( + bypassExecute = true, + bypassMemory = true, + bypassWriteBack = true, + bypassWriteBackBuffer = true, + pessimisticUseSrc = false, + pessimisticWriteRegFile = false, + pessimisticAddressMatch = false + ), + // new HazardSimplePlugin(false, true, false, true), + // new HazardSimplePlugin(false, false, false, false), + new MulPlugin, + new MulDivIterativePlugin( + genMul = false, + genDiv = true, + mulUnrollFactor = 32, + divUnrollFactor = 1 + ), + // new DivPlugin, + new CsrPlugin(CsrPluginConfig.all2(0x80000020l).copy(ebreakGen = false, mhartid = id)), + // new CsrPlugin(//CsrPluginConfig.all2(0x80000020l).copy(ebreakGen = true)/* + // CsrPluginConfig( + // catchIllegalAccess = false, + // mvendorid = null, + // marchid = null, + // mimpid = null, + // mhartid = null, + // misaExtensionsInit = 0, + // misaAccess = CsrAccess.READ_ONLY, + // mtvecAccess = CsrAccess.WRITE_ONLY, + // mtvecInit = 0x80000020l, + // mepcAccess = CsrAccess.READ_WRITE, + // mscratchGen = true, + // mcauseAccess = CsrAccess.READ_ONLY, + // mbadaddrAccess = CsrAccess.READ_ONLY, + // mcycleAccess = CsrAccess.NONE, + // minstretAccess = CsrAccess.NONE, + // ecallGen = true, + // ebreakGen = true, + // wfiGenAsWait = false, + // wfiGenAsNop = true, + // ucycleAccess = CsrAccess.NONE + // )), + new BranchPlugin( + earlyBranch = false, + catchAddressMisaligned = true, + fenceiGenAsAJump = false + ), + new YamlPlugin(s"cpu$id.yaml") + ) + ) + if(id == 0) config.plugins += new DebugPlugin(null) + config + } + def vexRiscvCluster() = VexRiscvSmpCluster( + debugClockDomain = ClockDomain.current.copy(reset = Bool().setName("debugResetIn")), + p = VexRiscvSmpClusterParameter( + cpuConfigs = List.tabulate(4) { + vexRiscvConfig(_) + } + ) + ) + def main(args: Array[String]): Unit = { + SpinalVerilog { + vexRiscvCluster() + } + } +} + + +object VexRiscvSmpClusterTest extends App{ + import spinal.core.sim._ + + val simConfig = SimConfig + simConfig.withWave + simConfig.allOptimisation + simConfig.addSimulatorFlag("--threads 1") + + simConfig.compile(VexRiscvSmpClusterGen.vexRiscvCluster()).doSim(seed = 42){dut => + dut.clockDomain.forkSimSpeedPrinter(1.0) + dut.clockDomain.forkStimulus(10) + dut.debugClockDomain.forkStimulus(10) + + + JtagTcp(dut.io.jtag, 100) + + val ram = new BmbMemoryAgent(0x100000000l) + ram.addPort(dut.io.iMem,0,dut.clockDomain,true) + ram.addPort(dut.io.dMem,0,dut.clockDomain,true) + + ram.memory.loadBin(0x80000000l, "src/test/cpp/raw/smp/build/smp.bin") + + sleep(10000*10) + simSuccess() + } +} \ No newline at end of file diff --git a/src/test/cpp/raw/smp/.gitignore b/src/test/cpp/raw/smp/.gitignore new file mode 100644 index 0000000..16512ff --- /dev/null +++ b/src/test/cpp/raw/smp/.gitignore @@ -0,0 +1,5 @@ +*.map +*.v +*.elf +*.o +*.hex \ No newline at end of file diff --git a/src/test/cpp/raw/smp/build/smp.asm b/src/test/cpp/raw/smp/build/smp.asm new file mode 100644 index 0000000..8173f8c --- /dev/null +++ b/src/test/cpp/raw/smp/build/smp.asm @@ -0,0 +1,108 @@ + +build/smp.elf: file format elf32-littleriscv + + +Disassembly of section .crt_section: + +80000000 <_start>: +80000000: f1402a73 csrr s4,mhartid +80000004: 00000517 auipc a0,0x0 +80000008: 07850513 addi a0,a0,120 # 8000007c +8000000c: 00000513 li a0,0 +80000010: 00a52023 sw a0,0(a0) + +80000014 : +80000014: 00100513 li a0,1 +80000018: 00000597 auipc a1,0x0 +8000001c: 05c58593 addi a1,a1,92 # 80000074 +80000020: 00a5a02f amoadd.w zero,a0,(a1) + +80000024 : +80000024: 00000417 auipc s0,0x0 +80000028: 05042403 lw s0,80(s0) # 80000074 +8000002c: 0c800513 li a0,200 +80000030: 038000ef jal ra,80000068 +80000034: 00000497 auipc s1,0x0 +80000038: 0404a483 lw s1,64(s1) # 80000074 +8000003c: fe8494e3 bne s1,s0,80000024 +80000040: 00000513 li a0,0 +80000044: 00952023 sw s1,0(a0) +80000048: 0040006f j 8000004c + +8000004c : +8000004c: 00800513 li a0,8 +80000050: 00052023 sw zero,0(a0) +80000054: 0100006f j 80000064 + +80000058 : +80000058: 00c00513 li a0,12 +8000005c: 00052023 sw zero,0(a0) +80000060: 0040006f j 80000064 + +80000064 : +80000064: 0000006f j 80000064 + +80000068 : +80000068: fff50513 addi a0,a0,-1 +8000006c: fe051ee3 bnez a0,80000068 +80000070: 00008067 ret + +80000074 : +80000074: 0000 unimp + ... + +80000078 : +80000078: 0000 unimp + ... + +8000007c : +8000007c: 0000000b 0xb + +80000080 : +80000080: 0016 c.slli zero,0x5 + ... + +80000084 : +80000084: 0049 c.nop 18 + ... + +80000088 : +80000088: 003a c.slli zero,0xe + ... + +8000008c : +8000008c: 0038 addi a4,sp,8 + ... + +80000090 : +80000090: 0000004b fnmsub.s ft0,ft0,ft0,ft0,rne + +80000094 : +80000094: 0038 addi a4,sp,8 + ... + +80000098 : +80000098: 00000053 fadd.s ft0,ft0,ft0,rne + +8000009c : +8000009c: 0021 c.nop 8 + ... + +800000a0 : +800000a0: ffffffbf 0xffffffbf + +800000a4 : +800000a4: ffa9 bnez a5,7ffffffe <_start-0x2> +800000a6: ffff 0xffff + +800000a8 : +800000a8: ffc9 bnez a5,80000042 +800000aa: ffff 0xffff + +800000ac : +800000ac: 0004 0x4 +800000ae: ffff 0xffff + +800000b0 : +800000b0: 0005 c.nop 1 +800000b2: ffff 0xffff diff --git a/src/test/cpp/raw/smp/makefile b/src/test/cpp/raw/smp/makefile new file mode 100644 index 0000000..0886c1b --- /dev/null +++ b/src/test/cpp/raw/smp/makefile @@ -0,0 +1,5 @@ +PROJ_NAME=smp + +ATOMIC=yes + +include ../common/asm.mk \ No newline at end of file diff --git a/src/test/cpp/raw/smp/src/crt.S b/src/test/cpp/raw/smp/src/crt.S new file mode 100644 index 0000000..be1e59d --- /dev/null +++ b/src/test/cpp/raw/smp/src/crt.S @@ -0,0 +1,70 @@ +#define REPORT_OFFSET 0xF0000000 +#define REPORT_THREAD_ID 0 +#define REPORT_THREAD_COUNT 1 +#define REPORT_SUCCESS 2 +#define REPORT_FAILURE 3 + +#define report(reg, id) \ + li a0, id*4; \ + sw reg, 0(a0); \ + +_start: + + #define HART_ID x20 + csrr HART_ID, mhartid + la a0, test1_data + report(a0, REPORT_THREAD_ID) + + +count_thread_start: + //Count up threads + li a0, 1 + la a1, thread_count + amoadd.w x0, a0, (a1) + +count_thread_wait: + //Wait everybody + lw s0, thread_count + li a0, 200 + call sleep + lw s1, thread_count + bne s1, s0, count_thread_wait + report(s1, REPORT_THREAD_ID) + + j success + +success: + report(x0, REPORT_SUCCESS) + j end + +failure: + report(x0, REPORT_FAILURE) + j end + +end: + j end + + +sleep: + addi a0, a0, -1 + bnez a0, sleep + ret + + +thread_count: .word 0 +shared_memory_1: .word 0 + +test1_data: .word 11 +test2_data: .word 22 +test3_data: .word 73 +test4_data: .word 58 +test5_data: .word 56 +test6_data: .word 75 +test7_data: .word 56 +test8_data: .word 83 +test9_data: .word 33 +test10_data: .word -65 +test11_data: .word -87 +test12_data: .word -55 +test13_data: .word 0xFFFF0004 +test14_data: .word 0xFFFF0005 \ No newline at end of file diff --git a/src/test/cpp/raw/smp/src/ld b/src/test/cpp/raw/smp/src/ld new file mode 100644 index 0000000..93d8de8 --- /dev/null +++ b/src/test/cpp/raw/smp/src/ld @@ -0,0 +1,16 @@ +OUTPUT_ARCH( "riscv" ) + +MEMORY { + onChipRam (W!RX)/*(RX)*/ : ORIGIN = 0x80000000, LENGTH = 128K +} + +SECTIONS +{ + + .crt_section : + { + . = ALIGN(4); + *crt.o(.text) + } > onChipRam + +} From fd52f9ba5090951de44d5d3c6972542bfa6edb27 Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Thu, 16 Apr 2020 02:22:18 +0200 Subject: [PATCH 22/91] Add smp.bin --- src/test/cpp/raw/smp/.gitignore | 3 ++- src/test/cpp/raw/smp/build/smp.bin | Bin 0 -> 180 bytes 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100755 src/test/cpp/raw/smp/build/smp.bin diff --git a/src/test/cpp/raw/smp/.gitignore b/src/test/cpp/raw/smp/.gitignore index 16512ff..a7caa3b 100644 --- a/src/test/cpp/raw/smp/.gitignore +++ b/src/test/cpp/raw/smp/.gitignore @@ -2,4 +2,5 @@ *.v *.elf *.o -*.hex \ No newline at end of file +*.hex +!*.bin \ No newline at end of file diff --git a/src/test/cpp/raw/smp/build/smp.bin b/src/test/cpp/raw/smp/build/smp.bin new file mode 100755 index 0000000000000000000000000000000000000000..05ebfb983b57501c52c46d03b1f0fe27ab61ea39 GIT binary patch literal 180 zcmXTca`-6D%D^DZ+R82rq?Hwx0@(r#(}C>Ctw&k)7c6BEXJKGqR$*ZkW^Lek&(Od; z9msE9!ou=+O3Ocpo~aD^3=Tkj4M6>@KsE!TFzW#*+X2J}%68Vd m42V5}*b0a(fY=*K2LowEAm0D~|NoUhd=iLR82 Date: Thu, 16 Apr 2020 15:23:25 +0200 Subject: [PATCH 23/91] More SMP tests (barrier via AMO and LRSC) --- .../demo/smp/VexRiscvSmpCluster.scala | 88 +++++-- src/test/cpp/raw/smp/build/smp.asm | 241 +++++++++++------- src/test/cpp/raw/smp/build/smp.bin | Bin 180 -> 496 bytes src/test/cpp/raw/smp/src/crt.S | 120 ++++++--- 4 files changed, 312 insertions(+), 137 deletions(-) diff --git a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala index 474357f..519a010 100644 --- a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala +++ b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala @@ -10,6 +10,8 @@ import vexriscv.ip.{DataCacheAck, DataCacheConfig, DataCacheMemBus, InstructionC import vexriscv.plugin.{BranchPlugin, CsrPlugin, CsrPluginConfig, DBusCachedPlugin, DBusSimplePlugin, DebugPlugin, DecoderSimplePlugin, FullBarrelShifterPlugin, HazardSimplePlugin, IBusCachedPlugin, IBusSimplePlugin, IntAluPlugin, MmuPlugin, MmuPortConfig, MulDivIterativePlugin, MulPlugin, RegFilePlugin, STATIC, SrcPlugin, YamlPlugin} import vexriscv.{VexRiscv, VexRiscvConfig, plugin} +import scala.collection.mutable + case class VexRiscvSmpClusterParameter( cpuConfigs : Seq[VexRiscvConfig]) @@ -27,7 +29,8 @@ case class VexRiscvSmpCluster(p : VexRiscvSmpClusterParameter, val io = new Bundle { val dMem = master(Bmb(dMemParameter)) - val iMem = master(Bmb(iMemParameter)) +// val iMem = master(Bmb(iMemParameter)) + val iMems = Vec(master(Bmb(iMemParameter)), p.cpuConfigs.size) val timerInterrupts = in Bits(p.cpuConfigs.size bits) val externalInterrupts = in Bits(p.cpuConfigs.size bits) val externalSupervisorInterrupts = in Bits(p.cpuConfigs.size bits) @@ -87,18 +90,19 @@ case class VexRiscvSmpCluster(p : VexRiscvSmpClusterParameter, io.dMem << invalidateMonitor.io.output - val iBusArbiter = BmbArbiter( - p = iBusArbiterParameter, - portCount = cpus.size, - pendingRspMax = 64, - lowerFirstPriority = false, - inputsWithInv = cpus.map(_ => true), - inputsWithSync = cpus.map(_ => true), - pendingInvMax = 16 - ) - - (iBusArbiter.io.inputs, cpus).zipped.foreach(_ << _.iBus) - io.iMem << iBusArbiter.io.output +// val iBusArbiter = BmbArbiter( +// p = iBusArbiterParameter, +// portCount = cpus.size, +// pendingRspMax = 64, +// lowerFirstPriority = false, +// inputsWithInv = cpus.map(_ => true), +// inputsWithSync = cpus.map(_ => true), +// pendingInvMax = 16 +// ) +// +// (iBusArbiter.io.inputs, cpus).zipped.foreach(_ << _.iBus) +// io.iMem << iBusArbiter.io.output + (io.iMems, cpus).zipped.foreach(_ << _.iBus) } @@ -274,6 +278,14 @@ object VexRiscvSmpClusterGen { } +object SmpTest{ + val REPORT_OFFSET = 0xF8000000 + val REPORT_THREAD_ID = 0x00 + val REPORT_THREAD_COUNT = 0x04 + val REPORT_END = 0x08 + val REPORT_BARRIER_START = 0x0C + val REPORT_BARRIER_END = 0x10 +} object VexRiscvSmpClusterTest extends App{ import spinal.core.sim._ @@ -282,21 +294,57 @@ object VexRiscvSmpClusterTest extends App{ simConfig.allOptimisation simConfig.addSimulatorFlag("--threads 1") - simConfig.compile(VexRiscvSmpClusterGen.vexRiscvCluster()).doSim(seed = 42){dut => + simConfig.compile(VexRiscvSmpClusterGen.vexRiscvCluster()).doSimUntilVoid(seed = 42){dut => + SimTimeout(10000*10) dut.clockDomain.forkSimSpeedPrinter(1.0) dut.clockDomain.forkStimulus(10) dut.debugClockDomain.forkStimulus(10) + val hartCount = dut.cpus.size JtagTcp(dut.io.jtag, 100) - val ram = new BmbMemoryAgent(0x100000000l) - ram.addPort(dut.io.iMem,0,dut.clockDomain,true) - ram.addPort(dut.io.dMem,0,dut.clockDomain,true) + val withStall = false + val cpuEnd = Array.fill(dut.p.cpuConfigs.size)(false) + val barriers = mutable.HashMap[Int, Int]() + val ram = new BmbMemoryAgent(0x100000000l){ + var writeData = 0 + override def setByte(address: Long, value: Byte): Unit = { + if((address & 0xF0000000l) != 0xF0000000l) return super.setByte(address, value) + val byteId = address & 3 + val mask = 0xFF << (byteId*8) + writeData = (writeData & ~mask) | ((value.toInt << (byteId*8)) & mask) + if(byteId != 3) return + val offset = (address & ~0xF0000000l)-3 + println(s"W[0x${offset.toHexString}] = $writeData") + offset match { + case _ if offset >= 0x8000000 && offset < 0x9000000 => { + val hart = ((offset & 0xFF0000) >> 16).toInt + val code = (offset & 0x00FFFF).toInt + val data = writeData + import SmpTest._ + code match { + case REPORT_THREAD_ID => assert(data == hart) + case REPORT_THREAD_COUNT => assert(data == hartCount) + case REPORT_END => assert(data == 0); assert(cpuEnd(hart) == false); cpuEnd(hart) = true; if(!cpuEnd.exists(_ == false)) simSuccess() + case REPORT_BARRIER_START => { + val counter = barriers.getOrElse(data, 0) + assert(counter < hartCount) + barriers(data) = counter + 1 + } + case REPORT_BARRIER_END => { + val counter = barriers.getOrElse(data, 0) + assert(counter == hartCount) + } + } + } + } + } + } + dut.io.iMems.foreach(ram.addPort(_,0,dut.clockDomain,true, withStall)) //Moarr powaaaaa +// ram.addPort(dut.io.iMem,0,dut.clockDomain,true, withStall) + ram.addPort(dut.io.dMem,0,dut.clockDomain,true, withStall) ram.memory.loadBin(0x80000000l, "src/test/cpp/raw/smp/build/smp.bin") - - sleep(10000*10) - simSuccess() } } \ No newline at end of file diff --git a/src/test/cpp/raw/smp/build/smp.asm b/src/test/cpp/raw/smp/build/smp.asm index 8173f8c..7820bb4 100644 --- a/src/test/cpp/raw/smp/build/smp.asm +++ b/src/test/cpp/raw/smp/build/smp.asm @@ -5,104 +5,171 @@ build/smp.elf: file format elf32-littleriscv Disassembly of section .crt_section: 80000000 <_start>: -80000000: f1402a73 csrr s4,mhartid -80000004: 00000517 auipc a0,0x0 -80000008: 07850513 addi a0,a0,120 # 8000007c -8000000c: 00000513 li a0,0 -80000010: 00a52023 sw a0,0(a0) +80000000: f1402473 csrr s0,mhartid +80000004: f80002b7 lui t0,0xf8000 +80000008: f1402373 csrr t1,mhartid +8000000c: 01031313 slli t1,t1,0x10 +80000010: 006282b3 add t0,t0,t1 +80000014: 0082a023 sw s0,0(t0) # f8000000 -80000014 : -80000014: 00100513 li a0,1 -80000018: 00000597 auipc a1,0x0 -8000001c: 05c58593 addi a1,a1,92 # 80000074 -80000020: 00a5a02f amoadd.w zero,a0,(a1) +80000018 : +80000018: 00100513 li a0,1 +8000001c: 00000597 auipc a1,0x0 +80000020: 1c458593 addi a1,a1,452 # 800001e0 +80000024: 00a5a02f amoadd.w zero,a0,(a1) -80000024 : -80000024: 00000417 auipc s0,0x0 -80000028: 05042403 lw s0,80(s0) # 80000074 -8000002c: 0c800513 li a0,200 -80000030: 038000ef jal ra,80000068 -80000034: 00000497 auipc s1,0x0 -80000038: 0404a483 lw s1,64(s1) # 80000074 -8000003c: fe8494e3 bne s1,s0,80000024 -80000040: 00000513 li a0,0 -80000044: 00952023 sw s1,0(a0) -80000048: 0040006f j 8000004c +80000028 : +80000028: 00000417 auipc s0,0x0 +8000002c: 1b842403 lw s0,440(s0) # 800001e0 +80000030: 0c800513 li a0,200 +80000034: 1a0000ef jal ra,800001d4 +80000038: 00000497 auipc s1,0x0 +8000003c: 1a84a483 lw s1,424(s1) # 800001e0 +80000040: fe8494e3 bne s1,s0,80000028 +80000044: f80002b7 lui t0,0xf8000 +80000048: 00428293 addi t0,t0,4 # f8000004 +8000004c: f1402373 csrr t1,mhartid +80000050: 01031313 slli t1,t1,0x10 +80000054: 006282b3 add t0,t0,t1 +80000058: 0092a023 sw s1,0(t0) -8000004c : -8000004c: 00800513 li a0,8 -80000050: 00052023 sw zero,0(a0) -80000054: 0100006f j 80000064 +8000005c : +8000005c: 00100513 li a0,1 +80000060: 040000ef jal ra,800000a0 +80000064: 00200513 li a0,2 +80000068: 038000ef jal ra,800000a0 +8000006c: 00300513 li a0,3 +80000070: 030000ef jal ra,800000a0 +80000074: 00400513 li a0,4 +80000078: 09c000ef jal ra,80000114 +8000007c: 00500513 li a0,5 +80000080: 094000ef jal ra,80000114 +80000084: 00600513 li a0,6 +80000088: 08c000ef jal ra,80000114 +8000008c: 00700513 li a0,7 +80000090: 010000ef jal ra,800000a0 +80000094: 00800513 li a0,8 +80000098: 07c000ef jal ra,80000114 +8000009c: 0f40006f j 80000190 -80000058 : -80000058: 00c00513 li a0,12 -8000005c: 00052023 sw zero,0(a0) -80000060: 0040006f j 80000064 +800000a0 : +800000a0: f80002b7 lui t0,0xf8000 +800000a4: 00c28293 addi t0,t0,12 # f800000c +800000a8: f1402373 csrr t1,mhartid +800000ac: 01031313 slli t1,t1,0x10 +800000b0: 006282b3 add t0,t0,t1 +800000b4: 00a2a023 sw a0,0(t0) +800000b8: 00000297 auipc t0,0x0 +800000bc: 13028293 addi t0,t0,304 # 800001e8 +800000c0: 00100313 li t1,1 +800000c4: 0062a02f amoadd.w zero,t1,(t0) +800000c8: 00000317 auipc t1,0x0 +800000cc: 11832303 lw t1,280(t1) # 800001e0 -80000064 : -80000064: 0000006f j 80000064 +800000d0 : +800000d0: 0002a383 lw t2,0(t0) +800000d4: fe639ee3 bne t2,t1,800000d0 +800000d8: f80002b7 lui t0,0xf8000 +800000dc: 01028293 addi t0,t0,16 # f8000010 +800000e0: f1402373 csrr t1,mhartid +800000e4: 01031313 slli t1,t1,0x10 +800000e8: 006282b3 add t0,t0,t1 +800000ec: 00a2a023 sw a0,0(t0) -80000068 : -80000068: fff50513 addi a0,a0,-1 -8000006c: fe051ee3 bnez a0,80000068 -80000070: 00008067 ret +800000f0 : +800000f0: f14022f3 csrr t0,mhartid +800000f4: 00029863 bnez t0,80000104 +800000f8: 00000297 auipc t0,0x0 +800000fc: 0e02a823 sw zero,240(t0) # 800001e8 +80000100: 00008067 ret -80000074 : -80000074: 0000 unimp +80000104 : +80000104: 00000297 auipc t0,0x0 +80000108: 0e42a283 lw t0,228(t0) # 800001e8 +8000010c: fe029ce3 bnez t0,80000104 +80000110: 00008067 ret + +80000114 : +80000114: f80002b7 lui t0,0xf8000 +80000118: 00c28293 addi t0,t0,12 # f800000c +8000011c: f1402373 csrr t1,mhartid +80000120: 01031313 slli t1,t1,0x10 +80000124: 006282b3 add t0,t0,t1 +80000128: 00a2a023 sw a0,0(t0) +8000012c: 00000297 auipc t0,0x0 +80000130: 0c028293 addi t0,t0,192 # 800001ec + +80000134 : +80000134: 1002a32f lr.w t1,(t0) +80000138: 00130313 addi t1,t1,1 +8000013c: 1862a32f sc.w t1,t1,(t0) +80000140: fe031ae3 bnez t1,80000134 +80000144: 00000317 auipc t1,0x0 +80000148: 09c32303 lw t1,156(t1) # 800001e0 + +8000014c : +8000014c: 0002a383 lw t2,0(t0) +80000150: fe639ee3 bne t2,t1,8000014c +80000154: f80002b7 lui t0,0xf8000 +80000158: 01028293 addi t0,t0,16 # f8000010 +8000015c: f1402373 csrr t1,mhartid +80000160: 01031313 slli t1,t1,0x10 +80000164: 006282b3 add t0,t0,t1 +80000168: 00a2a023 sw a0,0(t0) + +8000016c : +8000016c: f14022f3 csrr t0,mhartid +80000170: 00029863 bnez t0,80000180 +80000174: 00000297 auipc t0,0x0 +80000178: 0602ac23 sw zero,120(t0) # 800001ec +8000017c: 00008067 ret + +80000180 : +80000180: 00000297 auipc t0,0x0 +80000184: 06c2a283 lw t0,108(t0) # 800001ec +80000188: fe029ce3 bnez t0,80000180 +8000018c: 00008067 ret + +80000190 : +80000190: 00000413 li s0,0 +80000194: f80002b7 lui t0,0xf8000 +80000198: 00828293 addi t0,t0,8 # f8000008 +8000019c: f1402373 csrr t1,mhartid +800001a0: 01031313 slli t1,t1,0x10 +800001a4: 006282b3 add t0,t0,t1 +800001a8: 0082a023 sw s0,0(t0) +800001ac: 0240006f j 800001d0 + +800001b0 : +800001b0: 00100413 li s0,1 +800001b4: f80002b7 lui t0,0xf8000 +800001b8: 00828293 addi t0,t0,8 # f8000008 +800001bc: f1402373 csrr t1,mhartid +800001c0: 01031313 slli t1,t1,0x10 +800001c4: 006282b3 add t0,t0,t1 +800001c8: 0082a023 sw s0,0(t0) +800001cc: 0040006f j 800001d0 + +800001d0 : +800001d0: 0000006f j 800001d0 + +800001d4 : +800001d4: fff50513 addi a0,a0,-1 +800001d8: fe051ee3 bnez a0,800001d4 +800001dc: 00008067 ret + +800001e0 : +800001e0: 0000 unimp ... -80000078 : -80000078: 0000 unimp +800001e4 : +800001e4: 0000 unimp ... -8000007c : -8000007c: 0000000b 0xb - -80000080 : -80000080: 0016 c.slli zero,0x5 +800001e8 : +800001e8: 0000 unimp ... -80000084 : -80000084: 0049 c.nop 18 +800001ec : +800001ec: 0000 unimp ... - -80000088 : -80000088: 003a c.slli zero,0xe - ... - -8000008c : -8000008c: 0038 addi a4,sp,8 - ... - -80000090 : -80000090: 0000004b fnmsub.s ft0,ft0,ft0,ft0,rne - -80000094 : -80000094: 0038 addi a4,sp,8 - ... - -80000098 : -80000098: 00000053 fadd.s ft0,ft0,ft0,rne - -8000009c : -8000009c: 0021 c.nop 8 - ... - -800000a0 : -800000a0: ffffffbf 0xffffffbf - -800000a4 : -800000a4: ffa9 bnez a5,7ffffffe <_start-0x2> -800000a6: ffff 0xffff - -800000a8 : -800000a8: ffc9 bnez a5,80000042 -800000aa: ffff 0xffff - -800000ac : -800000ac: 0004 0x4 -800000ae: ffff 0xffff - -800000b0 : -800000b0: 0005 c.nop 1 -800000b2: ffff 0xffff diff --git a/src/test/cpp/raw/smp/build/smp.bin b/src/test/cpp/raw/smp/build/smp.bin index 05ebfb983b57501c52c46d03b1f0fe27ab61ea39..85f5095b37a3b0e5edf4e19cc9b07e93bfd0ea2b 100755 GIT binary patch literal 496 zcmb7Au}Z{15PjLro*o=$WQvn0BIf$|3Cn@KpCGxyQsI6eY~l}iLM%i=qF`ZXVJqjS zm^qD3U7_w|V=RnfKnzY^=*Cf%loGOCo7KngCvEK*~X)fcg31j-TFPWCL;> zZ<7iKOTbN-N4K$CT*g&BKQ|L}H7*(r?zKtnVTn+ZI$#8&W_<3GI%3p}$2DIkbyn9c z_Ne*VDfC9Gy;#xCkO=~pIUbu(pn-Ph&xN;LD{y%?T^ zX#P<)85Y-~s`($D13woINrImTTZJpD*88RZFYgD@`|)1%zP*X>+WMQ7oelUNf^6rv SI`@F@{`OJqs%96ao%ac}0*hk+ literal 180 zcmXTca`-6D%D^DZ+R82rq?Hwx0@(r#(}C>Ctw&k)7c6BEXJKGqR$*ZkW^Lek&(Od; z9msE9!ou=+O3Ocpo~aD^3=Tkj4M6>@KsE!TFzW#*+X2J}%68Vd m42V5}*b0a(fY=*K2LowEAm0D~|NoUhd=iLR82 Date: Thu, 16 Apr 2020 17:27:27 +0200 Subject: [PATCH 24/91] fix smp test barrier --- .../demo/smp/VexRiscvSmpCluster.scala | 55 +++-- src/test/cpp/raw/smp/build/smp.asm | 219 +++++++++--------- src/test/cpp/raw/smp/build/smp.bin | Bin 496 -> 504 bytes src/test/cpp/raw/smp/src/crt.S | 66 +++--- 4 files changed, 177 insertions(+), 163 deletions(-) diff --git a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala index 519a010..fff9b03 100644 --- a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala +++ b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala @@ -11,6 +11,7 @@ import vexriscv.plugin.{BranchPlugin, CsrPlugin, CsrPluginConfig, DBusCachedPlug import vexriscv.{VexRiscv, VexRiscvConfig, plugin} import scala.collection.mutable +import scala.collection.mutable.ArrayBuffer case class VexRiscvSmpClusterParameter( cpuConfigs : Seq[VexRiscvConfig]) @@ -262,17 +263,17 @@ object VexRiscvSmpClusterGen { if(id == 0) config.plugins += new DebugPlugin(null) config } - def vexRiscvCluster() = VexRiscvSmpCluster( + def vexRiscvCluster(cpuCount : Int) = VexRiscvSmpCluster( debugClockDomain = ClockDomain.current.copy(reset = Bool().setName("debugResetIn")), p = VexRiscvSmpClusterParameter( - cpuConfigs = List.tabulate(4) { + cpuConfigs = List.tabulate(cpuCount) { vexRiscvConfig(_) } ) ) def main(args: Array[String]): Unit = { SpinalVerilog { - vexRiscvCluster() + vexRiscvCluster(4) } } } @@ -294,19 +295,41 @@ object VexRiscvSmpClusterTest extends App{ simConfig.allOptimisation simConfig.addSimulatorFlag("--threads 1") - simConfig.compile(VexRiscvSmpClusterGen.vexRiscvCluster()).doSimUntilVoid(seed = 42){dut => - SimTimeout(10000*10) + val cpuCount = 4 + simConfig.compile(VexRiscvSmpClusterGen.vexRiscvCluster(cpuCount)).doSimUntilVoid(seed = 42){dut => + SimTimeout(10000*10*cpuCount) dut.clockDomain.forkSimSpeedPrinter(1.0) dut.clockDomain.forkStimulus(10) dut.debugClockDomain.forkStimulus(10) - val hartCount = dut.cpus.size JtagTcp(dut.io.jtag, 100) val withStall = false val cpuEnd = Array.fill(dut.p.cpuConfigs.size)(false) val barriers = mutable.HashMap[Int, Int]() + + var reportWatchdog = 0 + periodicaly(10000*10){ + assert(reportWatchdog != 0) + reportWatchdog = 0 + } + + case class Report(hart : Int, code : Int, data : Int){ + override def toString: String = { + f"CPU:$hart%2d h${code}%3x -> $data%3d" + } + } + val reports = ArrayBuffer.fill(cpuCount)(ArrayBuffer[Report]()) + onSimEnd{ + for((list, hart) <- reports.zipWithIndex){ + println(f"\n\n**** CPU $hart%2d ****") + for((report, reportId) <- list.zipWithIndex){ + println(f" $reportId%3d : h${report.code}%3x -> ${report.data}%3d") + } + } + } + val ram = new BmbMemoryAgent(0x100000000l){ var writeData = 0 override def setByte(address: Long, value: Byte): Unit = { @@ -316,25 +339,31 @@ object VexRiscvSmpClusterTest extends App{ writeData = (writeData & ~mask) | ((value.toInt << (byteId*8)) & mask) if(byteId != 3) return val offset = (address & ~0xF0000000l)-3 - println(s"W[0x${offset.toHexString}] = $writeData") +// println(s"W[0x${offset.toHexString}] = $writeData @${simTime()}") offset match { case _ if offset >= 0x8000000 && offset < 0x9000000 => { - val hart = ((offset & 0xFF0000) >> 16).toInt - val code = (offset & 0x00FFFF).toInt - val data = writeData + val report = Report( + hart = ((offset & 0xFF0000) >> 16).toInt, + code = (offset & 0x00FFFF).toInt, + data = writeData + ) + println(report) + reports(report.hart) += report + reportWatchdog += 1 + import report._ import SmpTest._ code match { case REPORT_THREAD_ID => assert(data == hart) - case REPORT_THREAD_COUNT => assert(data == hartCount) + case REPORT_THREAD_COUNT => assert(data == cpuCount) case REPORT_END => assert(data == 0); assert(cpuEnd(hart) == false); cpuEnd(hart) = true; if(!cpuEnd.exists(_ == false)) simSuccess() case REPORT_BARRIER_START => { val counter = barriers.getOrElse(data, 0) - assert(counter < hartCount) + assert(counter < cpuCount) barriers(data) = counter + 1 } case REPORT_BARRIER_END => { val counter = barriers.getOrElse(data, 0) - assert(counter == hartCount) + assert(counter == cpuCount) } } } diff --git a/src/test/cpp/raw/smp/build/smp.asm b/src/test/cpp/raw/smp/build/smp.asm index 7820bb4..db25d17 100644 --- a/src/test/cpp/raw/smp/build/smp.asm +++ b/src/test/cpp/raw/smp/build/smp.asm @@ -10,24 +10,24 @@ Disassembly of section .crt_section: 80000008: f1402373 csrr t1,mhartid 8000000c: 01031313 slli t1,t1,0x10 80000010: 006282b3 add t0,t0,t1 -80000014: 0082a023 sw s0,0(t0) # f8000000 +80000014: 0082a023 sw s0,0(t0) # f8000000 80000018 : 80000018: 00100513 li a0,1 8000001c: 00000597 auipc a1,0x0 -80000020: 1c458593 addi a1,a1,452 # 800001e0 +80000020: 1d058593 addi a1,a1,464 # 800001ec 80000024: 00a5a02f amoadd.w zero,a0,(a1) 80000028 : 80000028: 00000417 auipc s0,0x0 -8000002c: 1b842403 lw s0,440(s0) # 800001e0 +8000002c: 1c442403 lw s0,452(s0) # 800001ec 80000030: 0c800513 li a0,200 -80000034: 1a0000ef jal ra,800001d4 +80000034: 1ac000ef jal ra,800001e0 80000038: 00000497 auipc s1,0x0 -8000003c: 1a84a483 lw s1,424(s1) # 800001e0 +8000003c: 1b44a483 lw s1,436(s1) # 800001ec 80000040: fe8494e3 bne s1,s0,80000028 80000044: f80002b7 lui t0,0xf8000 -80000048: 00428293 addi t0,t0,4 # f8000004 +80000048: 00428293 addi t0,t0,4 # f8000004 8000004c: f1402373 csrr t1,mhartid 80000050: 01031313 slli t1,t1,0x10 80000054: 006282b3 add t0,t0,t1 @@ -41,135 +41,126 @@ Disassembly of section .crt_section: 8000006c: 00300513 li a0,3 80000070: 030000ef jal ra,800000a0 80000074: 00400513 li a0,4 -80000078: 09c000ef jal ra,80000114 +80000078: 0a4000ef jal ra,8000011c 8000007c: 00500513 li a0,5 -80000080: 094000ef jal ra,80000114 +80000080: 09c000ef jal ra,8000011c 80000084: 00600513 li a0,6 -80000088: 08c000ef jal ra,80000114 +80000088: 094000ef jal ra,8000011c 8000008c: 00700513 li a0,7 80000090: 010000ef jal ra,800000a0 80000094: 00800513 li a0,8 -80000098: 07c000ef jal ra,80000114 -8000009c: 0f40006f j 80000190 +80000098: 084000ef jal ra,8000011c +8000009c: 1000006f j 8000019c 800000a0 : 800000a0: f80002b7 lui t0,0xf8000 -800000a4: 00c28293 addi t0,t0,12 # f800000c +800000a4: 00c28293 addi t0,t0,12 # f800000c 800000a8: f1402373 csrr t1,mhartid 800000ac: 01031313 slli t1,t1,0x10 800000b0: 006282b3 add t0,t0,t1 800000b4: 00a2a023 sw a0,0(t0) -800000b8: 00000297 auipc t0,0x0 -800000bc: 13028293 addi t0,t0,304 # 800001e8 -800000c0: 00100313 li t1,1 -800000c4: 0062a02f amoadd.w zero,t1,(t0) -800000c8: 00000317 auipc t1,0x0 -800000cc: 11832303 lw t1,280(t1) # 800001e0 +800000b8: 00000e97 auipc t4,0x0 +800000bc: 13ceae83 lw t4,316(t4) # 800001f4 +800000c0: 00000297 auipc t0,0x0 +800000c4: 13028293 addi t0,t0,304 # 800001f0 +800000c8: 00100313 li t1,1 +800000cc: 0062a2af amoadd.w t0,t1,(t0) +800000d0: 00128293 addi t0,t0,1 +800000d4: 00000317 auipc t1,0x0 +800000d8: 11832303 lw t1,280(t1) # 800001ec +800000dc: 00629c63 bne t0,t1,800000f4 +800000e0: 001e8293 addi t0,t4,1 +800000e4: 00000317 auipc t1,0x0 +800000e8: 10032623 sw zero,268(t1) # 800001f0 +800000ec: 00000317 auipc t1,0x0 +800000f0: 10532423 sw t0,264(t1) # 800001f4 -800000d0 : -800000d0: 0002a383 lw t2,0(t0) -800000d4: fe639ee3 bne t2,t1,800000d0 -800000d8: f80002b7 lui t0,0xf8000 -800000dc: 01028293 addi t0,t0,16 # f8000010 -800000e0: f1402373 csrr t1,mhartid -800000e4: 01031313 slli t1,t1,0x10 -800000e8: 006282b3 add t0,t0,t1 -800000ec: 00a2a023 sw a0,0(t0) +800000f4 : +800000f4: 00000297 auipc t0,0x0 +800000f8: 1002a283 lw t0,256(t0) # 800001f4 +800000fc: ffd28ce3 beq t0,t4,800000f4 +80000100: f80002b7 lui t0,0xf8000 +80000104: 01028293 addi t0,t0,16 # f8000010 +80000108: f1402373 csrr t1,mhartid +8000010c: 01031313 slli t1,t1,0x10 +80000110: 006282b3 add t0,t0,t1 +80000114: 00a2a023 sw a0,0(t0) +80000118: 00008067 ret -800000f0 : -800000f0: f14022f3 csrr t0,mhartid -800000f4: 00029863 bnez t0,80000104 -800000f8: 00000297 auipc t0,0x0 -800000fc: 0e02a823 sw zero,240(t0) # 800001e8 -80000100: 00008067 ret +8000011c : +8000011c: f80002b7 lui t0,0xf8000 +80000120: 00c28293 addi t0,t0,12 # f800000c +80000124: f1402373 csrr t1,mhartid +80000128: 01031313 slli t1,t1,0x10 +8000012c: 006282b3 add t0,t0,t1 +80000130: 00a2a023 sw a0,0(t0) +80000134: 00000e97 auipc t4,0x0 +80000138: 0c0eae83 lw t4,192(t4) # 800001f4 +8000013c: 00000297 auipc t0,0x0 +80000140: 0b428293 addi t0,t0,180 # 800001f0 -80000104 : -80000104: 00000297 auipc t0,0x0 -80000108: 0e42a283 lw t0,228(t0) # 800001e8 -8000010c: fe029ce3 bnez t0,80000104 -80000110: 00008067 ret +80000144 : +80000144: 1002a32f lr.w t1,(t0) +80000148: 00130313 addi t1,t1,1 +8000014c: 1862a3af sc.w t2,t1,(t0) +80000150: fe039ae3 bnez t2,80000144 +80000154: 00000297 auipc t0,0x0 +80000158: 0982a283 lw t0,152(t0) # 800001ec +8000015c: 00629c63 bne t0,t1,80000174 +80000160: 001e8293 addi t0,t4,1 +80000164: 00000317 auipc t1,0x0 +80000168: 08032623 sw zero,140(t1) # 800001f0 +8000016c: 00000317 auipc t1,0x0 +80000170: 08532423 sw t0,136(t1) # 800001f4 -80000114 : -80000114: f80002b7 lui t0,0xf8000 -80000118: 00c28293 addi t0,t0,12 # f800000c -8000011c: f1402373 csrr t1,mhartid -80000120: 01031313 slli t1,t1,0x10 -80000124: 006282b3 add t0,t0,t1 -80000128: 00a2a023 sw a0,0(t0) -8000012c: 00000297 auipc t0,0x0 -80000130: 0c028293 addi t0,t0,192 # 800001ec - -80000134 : -80000134: 1002a32f lr.w t1,(t0) -80000138: 00130313 addi t1,t1,1 -8000013c: 1862a32f sc.w t1,t1,(t0) -80000140: fe031ae3 bnez t1,80000134 -80000144: 00000317 auipc t1,0x0 -80000148: 09c32303 lw t1,156(t1) # 800001e0 - -8000014c : -8000014c: 0002a383 lw t2,0(t0) -80000150: fe639ee3 bne t2,t1,8000014c -80000154: f80002b7 lui t0,0xf8000 -80000158: 01028293 addi t0,t0,16 # f8000010 -8000015c: f1402373 csrr t1,mhartid -80000160: 01031313 slli t1,t1,0x10 -80000164: 006282b3 add t0,t0,t1 -80000168: 00a2a023 sw a0,0(t0) - -8000016c : -8000016c: f14022f3 csrr t0,mhartid -80000170: 00029863 bnez t0,80000180 +80000174 : 80000174: 00000297 auipc t0,0x0 -80000178: 0602ac23 sw zero,120(t0) # 800001ec -8000017c: 00008067 ret +80000178: 0802a283 lw t0,128(t0) # 800001f4 +8000017c: ffd28ce3 beq t0,t4,80000174 +80000180: f80002b7 lui t0,0xf8000 +80000184: 01028293 addi t0,t0,16 # f8000010 +80000188: f1402373 csrr t1,mhartid +8000018c: 01031313 slli t1,t1,0x10 +80000190: 006282b3 add t0,t0,t1 +80000194: 00a2a023 sw a0,0(t0) +80000198: 00008067 ret -80000180 : -80000180: 00000297 auipc t0,0x0 -80000184: 06c2a283 lw t0,108(t0) # 800001ec -80000188: fe029ce3 bnez t0,80000180 -8000018c: 00008067 ret +8000019c : +8000019c: 00000413 li s0,0 +800001a0: f80002b7 lui t0,0xf8000 +800001a4: 00828293 addi t0,t0,8 # f8000008 +800001a8: f1402373 csrr t1,mhartid +800001ac: 01031313 slli t1,t1,0x10 +800001b0: 006282b3 add t0,t0,t1 +800001b4: 0082a023 sw s0,0(t0) +800001b8: 0240006f j 800001dc -80000190 : -80000190: 00000413 li s0,0 -80000194: f80002b7 lui t0,0xf8000 -80000198: 00828293 addi t0,t0,8 # f8000008 -8000019c: f1402373 csrr t1,mhartid -800001a0: 01031313 slli t1,t1,0x10 -800001a4: 006282b3 add t0,t0,t1 -800001a8: 0082a023 sw s0,0(t0) -800001ac: 0240006f j 800001d0 +800001bc : +800001bc: 00100413 li s0,1 +800001c0: f80002b7 lui t0,0xf8000 +800001c4: 00828293 addi t0,t0,8 # f8000008 +800001c8: f1402373 csrr t1,mhartid +800001cc: 01031313 slli t1,t1,0x10 +800001d0: 006282b3 add t0,t0,t1 +800001d4: 0082a023 sw s0,0(t0) +800001d8: 0040006f j 800001dc -800001b0 : -800001b0: 00100413 li s0,1 -800001b4: f80002b7 lui t0,0xf8000 -800001b8: 00828293 addi t0,t0,8 # f8000008 -800001bc: f1402373 csrr t1,mhartid -800001c0: 01031313 slli t1,t1,0x10 -800001c4: 006282b3 add t0,t0,t1 -800001c8: 0082a023 sw s0,0(t0) -800001cc: 0040006f j 800001d0 +800001dc : +800001dc: 0000006f j 800001dc -800001d0 : -800001d0: 0000006f j 800001d0 +800001e0 : +800001e0: fff50513 addi a0,a0,-1 +800001e4: fe051ee3 bnez a0,800001e0 +800001e8: 00008067 ret -800001d4 : -800001d4: fff50513 addi a0,a0,-1 -800001d8: fe051ee3 bnez a0,800001d4 -800001dc: 00008067 ret - -800001e0 : -800001e0: 0000 unimp - ... - -800001e4 : -800001e4: 0000 unimp - ... - -800001e8 : -800001e8: 0000 unimp - ... - -800001ec : +800001ec : 800001ec: 0000 unimp ... + +800001f0 : +800001f0: 0000 unimp + ... + +800001f4 : +800001f4: 0000 unimp + ... diff --git a/src/test/cpp/raw/smp/build/smp.bin b/src/test/cpp/raw/smp/build/smp.bin index 85f5095b37a3b0e5edf4e19cc9b07e93bfd0ea2b..9eaea4019f4f0b20fe25f3cedf802e0ed45ec5d7 100755 GIT binary patch literal 504 zcmah`J5Iwu6ddn*B?qEt;T1uAA|zY1;Rsfg10Zn$WD3e42~k82w4|V* zjdB1AuHXY$DUhff-o_NfMc8I`_U)UQw>u2N#k;`w&<__x(s?rx{ILNkjZ^`nTXobO zzo6{`GUy+Yikk&y`ziC}xqpy9+?2L|WO`WFxL;`$fJ^Ea1!76+lo5;}W4J}?jB(Z= zbzwQ}RrUUuSOIoBRI#{V4;1p&JV0h2q z0HhOu^Z^cG)&d~Szz9?g;%p@Sp rEX<(4I7#BM6!Slbk%u{vj8s4|at#w3!pK8xAR{NwU`zmdhk*eA_L4z& diff --git a/src/test/cpp/raw/smp/src/crt.S b/src/test/cpp/raw/smp/src/crt.S index 27b0717..eb197d6 100644 --- a/src/test/cpp/raw/smp/src/crt.S +++ b/src/test/cpp/raw/smp/src/crt.S @@ -58,49 +58,44 @@ barrier_amo_test: - +#define ENTRY_PHASE t4 barrier_amo: report(a0, REPORT_BARRIER_START) - la t0, barrier_amo_value + lw ENTRY_PHASE, barrier_phase + la t0, barrier_value li t1, 1 - amoadd.w x0, t1, (t0) + amoadd.w t0, t1, (t0) + addi t0, t0, 1 lw t1, thread_count + bne t0, t1, barrier_amo_wait + addi t0,ENTRY_PHASE,1 + sw x0, barrier_value, t1 + sw t0, barrier_phase, t1 barrier_amo_wait: - lw t2, (t0) - bne t2, t1, barrier_amo_wait + lw t0, barrier_phase + beq t0, ENTRY_PHASE, barrier_amo_wait report(a0, REPORT_BARRIER_END) -barrier_amo_reset: - csrr t0, mhartid - bnez t0, barrier_amo_reset_wait - sw x0, barrier_amo_value, t0 ret -barrier_amo_reset_wait: - lw t0, barrier_amo_value - bnez t0, barrier_amo_reset_wait - ret - + barrier_lrsc: - report(a0, REPORT_BARRIER_START) - la t0, barrier_lrsc_value + report(a0, REPORT_BARRIER_START) + lw ENTRY_PHASE, barrier_phase + la t0, barrier_value barrier_lrsc_try: - lr.w t1, (t0) - addi t1, t1, 1 - sc.w t1, t1, (t0) - bnez t1, barrier_lrsc_try - lw t1, thread_count + lr.w t1, (t0) + addi t1, t1, 1 + sc.w t2, t1, (t0) + bnez t2, barrier_lrsc_try + lw t0, thread_count + bne t0, t1, barrier_lrsc_wait + addi t0,ENTRY_PHASE,1 + sw x0, barrier_value, t1 + sw t0, barrier_phase, t1 barrier_lrsc_wait: - lw t2, (t0) - bne t2, t1, barrier_lrsc_wait - report(a0, REPORT_BARRIER_END) -barrier_lrsc_reset: - csrr t0, mhartid - bnez t0, barrier_lrsc_reset_wait - sw x0, barrier_lrsc_value, t0 - ret -barrier_lrsc_reset_wait: - lw t0, barrier_lrsc_value - bnez t0, barrier_lrsc_reset_wait - ret + lw t0, barrier_phase + beq t0, ENTRY_PHASE, barrier_lrsc_wait + report(a0, REPORT_BARRIER_END) + ret @@ -125,6 +120,5 @@ sleep: thread_count: .word 0 -shared_memory_1: .word 0 -barrier_amo_value: .word 0 -barrier_lrsc_value: .word 0 +barrier_value: .word 0 +barrier_phase: .word 0 From 8c0e534c6b20efdd01645d493380545b9d7497eb Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Sat, 18 Apr 2020 00:51:47 +0200 Subject: [PATCH 25/91] Add openSBI test, seem to work fine --- src/main/scala/vexriscv/Riscv.scala | 2 + .../demo/smp/VexRiscvSmpCluster.scala | 189 +++++++++++++----- src/test/cpp/raw/smp/build/smp.asm | 2 +- src/test/cpp/raw/smp/build/smp.bin | Bin 504 -> 504 bytes src/test/cpp/raw/smp/src/crt.S | 2 +- 5 files changed, 144 insertions(+), 51 deletions(-) diff --git a/src/main/scala/vexriscv/Riscv.scala b/src/main/scala/vexriscv/Riscv.scala index 91cf876..90a40c5 100644 --- a/src/main/scala/vexriscv/Riscv.scala +++ b/src/main/scala/vexriscv/Riscv.scala @@ -4,6 +4,8 @@ import spinal.core._ object Riscv{ + def misaToInt(values : String) = values.toLowerCase.map(e => 1 << (e-'a')).reduce(_ | _) + def funct7Range = 31 downto 25 def rdRange = 11 downto 7 def funct3Range = 14 downto 12 diff --git a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala index fff9b03..f92321c 100644 --- a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala +++ b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala @@ -1,14 +1,17 @@ package vexriscv.demo.smp +import spinal.core import spinal.core._ +import spinal.core.sim.{onSimEnd, simSuccess} import spinal.lib._ import spinal.lib.bus.bmb.sim.BmbMemoryAgent import spinal.lib.bus.bmb.{Bmb, BmbArbiter, BmbDecoder, BmbExclusiveMonitor, BmbInvalidateMonitor, BmbParameter} import spinal.lib.com.jtag.Jtag import spinal.lib.com.jtag.sim.JtagTcp +import vexriscv.demo.smp.VexRiscvSmpClusterTest.{cpuCount, withStall} import vexriscv.ip.{DataCacheAck, DataCacheConfig, DataCacheMemBus, InstructionCacheConfig} import vexriscv.plugin.{BranchPlugin, CsrPlugin, CsrPluginConfig, DBusCachedPlugin, DBusSimplePlugin, DebugPlugin, DecoderSimplePlugin, FullBarrelShifterPlugin, HazardSimplePlugin, IBusCachedPlugin, IBusSimplePlugin, IntAluPlugin, MmuPlugin, MmuPortConfig, MulDivIterativePlugin, MulPlugin, RegFilePlugin, STATIC, SrcPlugin, YamlPlugin} -import vexriscv.{VexRiscv, VexRiscvConfig, plugin} +import vexriscv.{Riscv, VexRiscv, VexRiscvConfig, plugin} import scala.collection.mutable import scala.collection.mutable.ArrayBuffer @@ -34,6 +37,7 @@ case class VexRiscvSmpCluster(p : VexRiscvSmpClusterParameter, val iMems = Vec(master(Bmb(iMemParameter)), p.cpuConfigs.size) val timerInterrupts = in Bits(p.cpuConfigs.size bits) val externalInterrupts = in Bits(p.cpuConfigs.size bits) + val softwareInterrupts = in Bits(p.cpuConfigs.size bits) val externalSupervisorInterrupts = in Bits(p.cpuConfigs.size bits) val jtag = slave(Jtag()) val debugReset = out Bool() @@ -53,6 +57,7 @@ case class VexRiscvSmpCluster(p : VexRiscvSmpClusterParameter, case plugin: IBusCachedPlugin => iBus = plugin.iBus.toBmb() case plugin: DBusCachedPlugin => dBus = plugin.dBus.toBmb() case plugin: CsrPlugin => { + plugin.softwareInterrupt := io.softwareInterrupts(cpuId) plugin.externalInterrupt := io.externalInterrupts(cpuId) plugin.timerInterrupt := io.timerInterrupts(cpuId) if (plugin.config.supervisorGen) plugin.externalInterruptS := io.externalSupervisorInterrupts(cpuId) @@ -228,7 +233,7 @@ object VexRiscvSmpClusterGen { divUnrollFactor = 1 ), // new DivPlugin, - new CsrPlugin(CsrPluginConfig.all2(0x80000020l).copy(ebreakGen = false, mhartid = id)), + new CsrPlugin(CsrPluginConfig.all2(0x80000020l).copy(ebreakGen = false, mhartid = id, misaExtensionsInit = Riscv.misaToInt("imas"))), // new CsrPlugin(//CsrPluginConfig.all2(0x80000020l).copy(ebreakGen = true)/* // CsrPluginConfig( // catchIllegalAccess = false, @@ -279,59 +284,51 @@ object VexRiscvSmpClusterGen { } -object SmpTest{ + +object VexRiscvSmpClusterTestInfrastructure{ val REPORT_OFFSET = 0xF8000000 val REPORT_THREAD_ID = 0x00 val REPORT_THREAD_COUNT = 0x04 val REPORT_END = 0x08 val REPORT_BARRIER_START = 0x0C val REPORT_BARRIER_END = 0x10 -} -object VexRiscvSmpClusterTest extends App{ - import spinal.core.sim._ - val simConfig = SimConfig - simConfig.withWave - simConfig.allOptimisation - simConfig.addSimulatorFlag("--threads 1") + val PUTC = 0x00 + val GETC = 0x04 + val CLINT_ADDR = 0x10000 + val CLINT_IPI_ADDR = CLINT_ADDR+0x0000 + val CLINT_CMP_ADDR = CLINT_ADDR+0x4000 + val CLINT_TIME_ADDR = CLINT_ADDR+0xBFF8 - val cpuCount = 4 - simConfig.compile(VexRiscvSmpClusterGen.vexRiscvCluster(cpuCount)).doSimUntilVoid(seed = 42){dut => - SimTimeout(10000*10*cpuCount) - dut.clockDomain.forkSimSpeedPrinter(1.0) - dut.clockDomain.forkStimulus(10) - dut.debugClockDomain.forkStimulus(10) - - - JtagTcp(dut.io.jtag, 100) - - val withStall = false - val cpuEnd = Array.fill(dut.p.cpuConfigs.size)(false) - val barriers = mutable.HashMap[Int, Int]() - - var reportWatchdog = 0 - periodicaly(10000*10){ - assert(reportWatchdog != 0) - reportWatchdog = 0 - } - - case class Report(hart : Int, code : Int, data : Int){ - override def toString: String = { - f"CPU:$hart%2d h${code}%3x -> $data%3d" - } - } - val reports = ArrayBuffer.fill(cpuCount)(ArrayBuffer[Report]()) - onSimEnd{ - for((list, hart) <- reports.zipWithIndex){ - println(f"\n\n**** CPU $hart%2d ****") - for((report, reportId) <- list.zipWithIndex){ - println(f" $reportId%3d : h${report.code}%3x -> ${report.data}%3d") + def ram(dut : VexRiscvSmpCluster) = { + import spinal.core.sim._ + val cpuCount = dut.cpus.size + val ram = new BmbMemoryAgent(0x100000000l){ + case class Report(hart : Int, code : Int, data : Int){ + override def toString: String = { + f"CPU:$hart%2d ${code}%3x -> $data%3d" + } + } + val reports = ArrayBuffer.fill(cpuCount)(ArrayBuffer[Report]()) + onSimEnd{ + for((list, hart) <- reports.zipWithIndex){ + println(f"\n\n**** CPU $hart%2d ****") + for((report, reportId) <- list.zipWithIndex){ + println(f" $reportId%3d : ${report.code}%3x -> ${report.data}%3d") + } } } - } - val ram = new BmbMemoryAgent(0x100000000l){ + val writeTable = mutable.HashMap[Int, Int => Unit]() + val readTable = mutable.HashMap[Int, () => Int]() + def onWrite(address : Int)(body : Int => Unit) = writeTable(address) = body + def onRead(address : Int)(body : => Int) = readTable(address) = () => body + var writeData = 0 + var readData = 0 + var reportWatchdog = 0 + val cpuEnd = Array.fill(cpuCount)(false) + val barriers = mutable.HashMap[Int, Int]() override def setByte(address: Long, value: Byte): Unit = { if((address & 0xF0000000l) != 0xF0000000l) return super.setByte(address, value) val byteId = address & 3 @@ -339,7 +336,7 @@ object VexRiscvSmpClusterTest extends App{ writeData = (writeData & ~mask) | ((value.toInt << (byteId*8)) & mask) if(byteId != 3) return val offset = (address & ~0xF0000000l)-3 -// println(s"W[0x${offset.toHexString}] = $writeData @${simTime()}") + // println(s"W[0x${offset.toHexString}] = $writeData @${simTime()}") offset match { case _ if offset >= 0x8000000 && offset < 0x9000000 => { val report = Report( @@ -351,7 +348,6 @@ object VexRiscvSmpClusterTest extends App{ reports(report.hart) += report reportWatchdog += 1 import report._ - import SmpTest._ code match { case REPORT_THREAD_ID => assert(data == hart) case REPORT_THREAD_COUNT => assert(data == cpuCount) @@ -367,13 +363,108 @@ object VexRiscvSmpClusterTest extends App{ } } } + case _ => writeTable.get(offset.toInt) match { + case Some(x) => x(writeData) + case _ => simFailure(f"\n\nWrite at ${address-3}%8x with $writeData%8x") + } } } - } - dut.io.iMems.foreach(ram.addPort(_,0,dut.clockDomain,true, withStall)) //Moarr powaaaaa -// ram.addPort(dut.io.iMem,0,dut.clockDomain,true, withStall) - ram.addPort(dut.io.dMem,0,dut.clockDomain,true, withStall) + override def getByte(address: Long): Byte = { + if((address & 0xF0000000l) != 0xF0000000l) return super.getByte(address) + val byteId = address & 3 + val offset = (address & ~0xF0000000l) + if(byteId == 0) readData = readTable.get(offset.toInt) match { + case Some(x) => x() + case _ => simFailure(f"\n\nRead at $address%8x") + } + (readData >> (byteId*8)).toByte + } + + val clint = new { + val cmp = Array.fill(cpuCount)(0l) + } + + onWrite(PUTC)(data => print(data.toChar)) +// onWrite(GETC)(data => System.in.read().toInt) + + onRead(CLINT_TIME_ADDR)(simTime().toInt) + onRead(CLINT_TIME_ADDR+4)((simTime() >> 32).toInt) + for(hartId <- 0 until cpuCount){ + onWrite(CLINT_IPI_ADDR + hartId*4) {data => + val mask = 1l << hartId + val value = (dut.io.softwareInterrupts.toLong & ~mask) | (if(data == 1) mask else 0) + dut.io.softwareInterrupts #= value + } + onRead(CLINT_CMP_ADDR + hartId*8)(clint.cmp(hartId).toInt) + onRead(CLINT_CMP_ADDR + hartId*8+4)((clint.cmp(hartId) >> 32).toInt) + onWrite(CLINT_CMP_ADDR + hartId*8)(data => clint.cmp(hartId) = (clint.cmp(hartId) & 0xFFFFFFFF00000000l) | data) + onWrite(CLINT_CMP_ADDR + hartId*8+4)(data => (clint.cmp(hartId) & 0x00000000FFFFFFFFl) | (data << 32)) + } + + var time = 0l + periodicaly(100){ + time += 10 + var timerInterrupts = 0l + for(i <- 0 until cpuCount){ + if(clint.cmp(i) < time) timerInterrupts |= 1l << i + } + dut.io.timerInterrupts #= timerInterrupts + } + + } + dut.io.iMems.foreach(ram.addPort(_,0,dut.clockDomain,true, withStall)) + ram.addPort(dut.io.dMem,0,dut.clockDomain,true, withStall) + ram + } + def init(dut : VexRiscvSmpCluster): Unit ={ + import spinal.core.sim._ + dut.clockDomain.forkSimSpeedPrinter(1.0) + dut.clockDomain.forkStimulus(10) + dut.debugClockDomain.forkStimulus(10) + JtagTcp(dut.io.jtag, 100) + } +} + +object VexRiscvSmpClusterTest extends App{ + import spinal.core.sim._ + + val simConfig = SimConfig +// simConfig.withWave + simConfig.allOptimisation + simConfig.addSimulatorFlag("--threads 1") + + val cpuCount = 4 + val withStall = true + + simConfig.compile(VexRiscvSmpClusterGen.vexRiscvCluster(cpuCount)).doSimUntilVoid(seed = 42){dut => + SimTimeout(10000*10*cpuCount) + VexRiscvSmpClusterTestInfrastructure.init(dut) + val ram = VexRiscvSmpClusterTestInfrastructure.ram(dut) ram.memory.loadBin(0x80000000l, "src/test/cpp/raw/smp/build/smp.bin") + periodicaly(20000*10){ + assert(ram.reportWatchdog != 0) + ram.reportWatchdog = 0 + } + } +} + + +object VexRiscvSmpClusterOpenSbi extends App{ + import spinal.core.sim._ + + val simConfig = SimConfig + simConfig.withWave + simConfig.allOptimisation + simConfig.addSimulatorFlag("--threads 1") + + val cpuCount = 4 + val withStall = false + + simConfig.compile(VexRiscvSmpClusterGen.vexRiscvCluster(cpuCount)).doSimUntilVoid(seed = 42){dut => + VexRiscvSmpClusterTestInfrastructure.init(dut) + val ram = VexRiscvSmpClusterTestInfrastructure.ram(dut) + ram.memory.loadBin(0x80000000l, "../opensbi/build/platform/spinal/vexriscv/sim/smp/firmware/fw_payload.bin") +// ram.memory.loadBin(0x80000000l, "../opensbi/build/platform/spinal/vexriscv/sim/smp/firmware/fw_jump.bin") } } \ No newline at end of file diff --git a/src/test/cpp/raw/smp/build/smp.asm b/src/test/cpp/raw/smp/build/smp.asm index db25d17..06f2616 100644 --- a/src/test/cpp/raw/smp/build/smp.asm +++ b/src/test/cpp/raw/smp/build/smp.asm @@ -21,7 +21,7 @@ Disassembly of section .crt_section: 80000028 : 80000028: 00000417 auipc s0,0x0 8000002c: 1c442403 lw s0,452(s0) # 800001ec -80000030: 0c800513 li a0,200 +80000030: 19000513 li a0,400 80000034: 1ac000ef jal ra,800001e0 80000038: 00000497 auipc s1,0x0 8000003c: 1b44a483 lw s1,436(s1) # 800001ec diff --git a/src/test/cpp/raw/smp/build/smp.bin b/src/test/cpp/raw/smp/build/smp.bin index 9eaea4019f4f0b20fe25f3cedf802e0ed45ec5d7..59a832fee66759269250ede3b78662b6fdd2d1d3 100755 GIT binary patch delta 13 Ucmeyt{DXOd5fg*tMw26q03)LWegFUf delta 13 Ucmeyt{DXOd5mN)tMw26q048Dt@c;k- diff --git a/src/test/cpp/raw/smp/src/crt.S b/src/test/cpp/raw/smp/src/crt.S index eb197d6..72cc5b8 100644 --- a/src/test/cpp/raw/smp/src/crt.S +++ b/src/test/cpp/raw/smp/src/crt.S @@ -26,7 +26,7 @@ count_thread_start: count_thread_wait: //Wait everybody lw s0, thread_count - li a0, 200 + li a0, 400 call sleep lw s1, thread_count bne s1, s0, count_thread_wait From befecc7ed6a02ded6c9c34981737db4dd3c9fd0d Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Sat, 18 Apr 2020 00:51:57 +0200 Subject: [PATCH 26/91] cleaning --- src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala index f92321c..ad1d3c3 100644 --- a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala +++ b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala @@ -419,7 +419,7 @@ object VexRiscvSmpClusterTestInfrastructure{ } def init(dut : VexRiscvSmpCluster): Unit ={ import spinal.core.sim._ - dut.clockDomain.forkSimSpeedPrinter(1.0) +// dut.clockDomain.forkSimSpeedPrinter(1.0) dut.clockDomain.forkStimulus(10) dut.debugClockDomain.forkStimulus(10) JtagTcp(dut.io.jtag, 100) From 4a49e6d91fc575c125485496c24f61c1c830aef4 Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Sat, 18 Apr 2020 01:26:31 +0200 Subject: [PATCH 27/91] initialize the clint in sim --- src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala index ad1d3c3..e691ea2 100644 --- a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala +++ b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala @@ -388,6 +388,10 @@ object VexRiscvSmpClusterTestInfrastructure{ onWrite(PUTC)(data => print(data.toChar)) // onWrite(GETC)(data => System.in.read().toInt) + dut.io.softwareInterrupts #= 0 + dut.io.timerInterrupts #= 0 + dut.io.externalInterrupts #= 0 + dut.io.externalSupervisorInterrupts #= 0 onRead(CLINT_TIME_ADDR)(simTime().toInt) onRead(CLINT_TIME_ADDR+4)((simTime() >> 32).toInt) for(hartId <- 0 until cpuCount){ @@ -458,7 +462,7 @@ object VexRiscvSmpClusterOpenSbi extends App{ simConfig.allOptimisation simConfig.addSimulatorFlag("--threads 1") - val cpuCount = 4 + val cpuCount = 16 val withStall = false simConfig.compile(VexRiscvSmpClusterGen.vexRiscvCluster(cpuCount)).doSimUntilVoid(seed = 42){dut => From af128ec9eb25d3f4ba6011a082064ca1879bb60c Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Sat, 18 Apr 2020 01:27:35 +0200 Subject: [PATCH 28/91] revert to 4 cpu --- src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala index e691ea2..92de7aa 100644 --- a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala +++ b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala @@ -462,7 +462,7 @@ object VexRiscvSmpClusterOpenSbi extends App{ simConfig.allOptimisation simConfig.addSimulatorFlag("--threads 1") - val cpuCount = 16 + val cpuCount = 4 val withStall = false simConfig.compile(VexRiscvSmpClusterGen.vexRiscvCluster(cpuCount)).doSimUntilVoid(seed = 42){dut => From a1b6353d6bafb24364e969339ae3b15b12bc180a Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Sun, 19 Apr 2020 19:48:57 +0200 Subject: [PATCH 29/91] workaround AMO LR/SC consistancy issue, but that need a proper fix --- src/main/scala/vexriscv/ip/DataCache.scala | 9 ++++++--- src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala | 9 ++++----- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/src/main/scala/vexriscv/ip/DataCache.scala b/src/main/scala/vexriscv/ip/DataCache.scala index 8005440..d685a34 100644 --- a/src/main/scala/vexriscv/ip/DataCache.scala +++ b/src/main/scala/vexriscv/ip/DataCache.scala @@ -514,6 +514,7 @@ class DataCache(val p : DataCacheConfig) extends Component{ val counter = Reg(UInt(log2Up(pendingMax) + 1 bits)) init(0) counter := counter + U(io.mem.cmd.fire && io.mem.cmd.last) - U(io.mem.rsp.valid && io.mem.rsp.last) + val consistent = counter === 0 val full = RegNext(counter.msb) val last = counter === 1 @@ -533,6 +534,8 @@ class DataCache(val p : DataCacheConfig) extends Component{ val full = RegNext(counter.msb) io.cpu.execute.haltIt setWhen(full) + + val consistent = counter === 0 } @@ -546,9 +549,9 @@ class DataCache(val p : DataCacheConfig) extends Component{ val wayInvalidate = B(0, wayCount bits) //Used if invalidate enabled when(io.cpu.execute.fence){ - val counter = if(withInvalidate) sync.counter else if(withWriteResponse) pending.counter else null - if(counter != null){ - when(counter =/= 0 || io.cpu.memory.isValid || io.cpu.writeBack.isValid){ + val consistent = if(withInvalidate) sync.consistent else if(withWriteResponse) pending.consistent else null + if(consistent != null){ + when(!consistent || io.cpu.memory.isValid && io.cpu.memory.isWrite || io.cpu.writeBack.isValid && io.cpu.memory.isWrite){ io.cpu.execute.haltIt := True } } diff --git a/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala b/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala index abd6d52..769ed07 100644 --- a/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala +++ b/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala @@ -229,11 +229,10 @@ class DBusCachedPlugin(val config : DataCacheConfig, val ff = input(INSTRUCTION)(31 downto 20).as(FenceFlags()) if(withWriteResponse){ hazard setWhen(input(MEMORY_FENCE) && (ff.PS && ff.SL)) //Manage write to read hit ordering (ensure invalidation timings) -// Not required as LR SC AMO emited on the memory bus enforce the ordering, + it bypass the cache -// when(input(INSTRUCTION)(26 downto 25) =/= 0){ -// if(withLrSc) hazard setWhen(input(MEMORY_LRSC)) -// if(withAmo) hazard setWhen(input(MEMORY_AMO)) -// } + when(input(INSTRUCTION)(26 downto 25) =/= 0){ + if(withLrSc) hazard setWhen(input(MEMORY_LRSC)) + if(withAmo) hazard setWhen(input(MEMORY_AMO)) + } } insert(MEMORY_FENCE_DECODED) := hazard } From 8e8b64feaaf0d3c93aab6c61d33be08260c4c60c Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Sun, 19 Apr 2020 19:49:26 +0200 Subject: [PATCH 30/91] Got full linux / buildroot to boot in 4 cpu config --- .../demo/smp/VexRiscvSmpCluster.scala | 73 ++++++++++++++----- 1 file changed, 55 insertions(+), 18 deletions(-) diff --git a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala index 92de7aa..b490654 100644 --- a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala +++ b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala @@ -383,17 +383,32 @@ object VexRiscvSmpClusterTestInfrastructure{ val clint = new { val cmp = Array.fill(cpuCount)(0l) + var time = 0l + periodicaly(100){ + time += 10 + var timerInterrupts = 0l + for(i <- 0 until cpuCount){ + if(cmp(i) < time) timerInterrupts |= 1l << i + } + dut.io.timerInterrupts #= timerInterrupts + } + +// delayed(200*1000000){ +// dut.io.softwareInterrupts #= 0xE +// enableSimWave() +// println("force IPI") +// } } onWrite(PUTC)(data => print(data.toChar)) -// onWrite(GETC)(data => System.in.read().toInt) + onRead(GETC)( if(System.in.available() != 0) System.in.read() else -1) dut.io.softwareInterrupts #= 0 dut.io.timerInterrupts #= 0 dut.io.externalInterrupts #= 0 dut.io.externalSupervisorInterrupts #= 0 - onRead(CLINT_TIME_ADDR)(simTime().toInt) - onRead(CLINT_TIME_ADDR+4)((simTime() >> 32).toInt) + onRead(CLINT_TIME_ADDR)(clint.time.toInt) + onRead(CLINT_TIME_ADDR+4)((clint.time >> 32).toInt) for(hartId <- 0 until cpuCount){ onWrite(CLINT_IPI_ADDR + hartId*4) {data => val mask = 1l << hartId @@ -402,19 +417,11 @@ object VexRiscvSmpClusterTestInfrastructure{ } onRead(CLINT_CMP_ADDR + hartId*8)(clint.cmp(hartId).toInt) onRead(CLINT_CMP_ADDR + hartId*8+4)((clint.cmp(hartId) >> 32).toInt) - onWrite(CLINT_CMP_ADDR + hartId*8)(data => clint.cmp(hartId) = (clint.cmp(hartId) & 0xFFFFFFFF00000000l) | data) - onWrite(CLINT_CMP_ADDR + hartId*8+4)(data => (clint.cmp(hartId) & 0x00000000FFFFFFFFl) | (data << 32)) + onWrite(CLINT_CMP_ADDR + hartId*8){data => clint.cmp(hartId) = (clint.cmp(hartId) & 0xFFFFFFFF00000000l) | data} + onWrite(CLINT_CMP_ADDR + hartId*8+4){data => clint.cmp(hartId) = (clint.cmp(hartId) & 0x00000000FFFFFFFFl) | (data.toLong << 32)} } - var time = 0l - periodicaly(100){ - time += 10 - var timerInterrupts = 0l - for(i <- 0 until cpuCount){ - if(clint.cmp(i) < time) timerInterrupts |= 1l << i - } - dut.io.timerInterrupts #= timerInterrupts - } + } dut.io.iMems.foreach(ram.addPort(_,0,dut.clockDomain,true, withStall)) @@ -423,7 +430,6 @@ object VexRiscvSmpClusterTestInfrastructure{ } def init(dut : VexRiscvSmpCluster): Unit ={ import spinal.core.sim._ -// dut.clockDomain.forkSimSpeedPrinter(1.0) dut.clockDomain.forkStimulus(10) dut.debugClockDomain.forkStimulus(10) JtagTcp(dut.io.jtag, 100) @@ -465,10 +471,41 @@ object VexRiscvSmpClusterOpenSbi extends App{ val cpuCount = 4 val withStall = false - simConfig.compile(VexRiscvSmpClusterGen.vexRiscvCluster(cpuCount)).doSimUntilVoid(seed = 42){dut => + simConfig.workspaceName("rawr_4c").compile(VexRiscvSmpClusterGen.vexRiscvCluster(cpuCount)).doSimUntilVoid(seed = 42){dut => +// dut.clockDomain.forkSimSpeedPrinter(1.0) VexRiscvSmpClusterTestInfrastructure.init(dut) val ram = VexRiscvSmpClusterTestInfrastructure.ram(dut) - ram.memory.loadBin(0x80000000l, "../opensbi/build/platform/spinal/vexriscv/sim/smp/firmware/fw_payload.bin") -// ram.memory.loadBin(0x80000000l, "../opensbi/build/platform/spinal/vexriscv/sim/smp/firmware/fw_jump.bin") +// ram.memory.loadBin(0x80000000l, "../opensbi/build/platform/spinal/vexriscv/sim/smp/firmware/fw_payload.bin") + ram.memory.loadBin(0x80000000l, "../opensbi/build/platform/spinal/vexriscv/sim/smp/firmware/fw_jump.bin") + ram.memory.loadBin(0xC0000000l, "../buildroot/output/images/Image") + ram.memory.loadBin(0xC1000000l, "../buildroot/output/images/dtb") + ram.memory.loadBin(0xC2000000l, "../buildroot/output/images/rootfs.cpio") + +// fork{ +// disableSimWave() +// val atMs = 130 +// val durationMs = 15 +// sleep(atMs*1000000) +// enableSimWave() +// println("** enableSimWave **") +// sleep(durationMs*1000000) +// println("** disableSimWave **") +// while(true) { +// disableSimWave() +// sleep(100000 * 10) +// enableSimWave() +// sleep( 100 * 10) +// } +//// simSuccess() +// } + + fork{ + while(true) { + disableSimWave() + sleep(100000 * 10) + enableSimWave() + sleep( 100 * 10) + } + } } } \ No newline at end of file From b389878d2323002d6619981a5a82cc4581cf2715 Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Tue, 21 Apr 2020 12:18:10 +0200 Subject: [PATCH 31/91] Add smp consistency check, fix VexRiscv invalidation read during write hazard logic --- .../demo/smp/VexRiscvSmpCluster.scala | 51 ++- src/main/scala/vexriscv/ip/DataCache.scala | 46 +- .../vexriscv/plugin/DBusCachedPlugin.scala | 27 +- src/test/cpp/raw/smp/build/smp.asm | 409 +++++++++++++----- src/test/cpp/raw/smp/build/smp.bin | Bin 504 -> 1144 bytes src/test/cpp/raw/smp/src/crt.S | 145 ++++++- 6 files changed, 517 insertions(+), 161 deletions(-) diff --git a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala index b490654..0371b29 100644 --- a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala +++ b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala @@ -292,6 +292,7 @@ object VexRiscvSmpClusterTestInfrastructure{ val REPORT_END = 0x08 val REPORT_BARRIER_START = 0x0C val REPORT_BARRIER_END = 0x10 + val REPORT_CONSISTENCY_VALUES = 0x14 val PUTC = 0x00 val GETC = 0x04 @@ -310,14 +311,7 @@ object VexRiscvSmpClusterTestInfrastructure{ } } val reports = ArrayBuffer.fill(cpuCount)(ArrayBuffer[Report]()) - onSimEnd{ - for((list, hart) <- reports.zipWithIndex){ - println(f"\n\n**** CPU $hart%2d ****") - for((report, reportId) <- list.zipWithIndex){ - println(f" $reportId%3d : ${report.code}%3x -> ${report.data}%3d") - } - } - } + val writeTable = mutable.HashMap[Int, Int => Unit]() val readTable = mutable.HashMap[Int, () => Int]() @@ -329,6 +323,24 @@ object VexRiscvSmpClusterTestInfrastructure{ var reportWatchdog = 0 val cpuEnd = Array.fill(cpuCount)(false) val barriers = mutable.HashMap[Int, Int]() + var consistancyCounter = 0 + var consistancyLast = 0 + var consistancyA = 0 + var consistancyB = 0 + var consistancyAB = 0 + var consistancyNone = 0 + + onSimEnd{ + for((list, hart) <- reports.zipWithIndex){ + println(f"\n\n**** CPU $hart%2d ****") + for((report, reportId) <- list.zipWithIndex){ + println(f" $reportId%3d : ${report.code}%3x -> ${report.data}%3d") + } + } + + println(s"consistancy NONE:$consistancyNone A:$consistancyA B:$consistancyB AB:$consistancyAB") + } + override def setByte(address: Long, value: Byte): Unit = { if((address & 0xF0000000l) != 0xF0000000l) return super.setByte(address, value) val byteId = address & 3 @@ -344,7 +356,7 @@ object VexRiscvSmpClusterTestInfrastructure{ code = (offset & 0x00FFFF).toInt, data = writeData ) - println(report) +// println(report) reports(report.hart) += report reportWatchdog += 1 import report._ @@ -361,6 +373,21 @@ object VexRiscvSmpClusterTestInfrastructure{ val counter = barriers.getOrElse(data, 0) assert(counter == cpuCount) } + case REPORT_CONSISTENCY_VALUES => consistancyCounter match { + case 0 => { + consistancyCounter = 1 + consistancyLast = data + } + case 1 => { + consistancyCounter = 0 + (data, consistancyLast) match { + case (666, 0) => consistancyA += 1 + case (0, 666) => consistancyB += 1 + case (666, 666) => consistancyAB += 1 + case (0,0) => consistancyNone += 1; simFailure("Consistancy issue :(") + } + } + } } } case _ => writeTable.get(offset.toInt) match { @@ -440,7 +467,7 @@ object VexRiscvSmpClusterTest extends App{ import spinal.core.sim._ val simConfig = SimConfig -// simConfig.withWave + simConfig.withWave simConfig.allOptimisation simConfig.addSimulatorFlag("--threads 1") @@ -448,7 +475,9 @@ object VexRiscvSmpClusterTest extends App{ val withStall = true simConfig.compile(VexRiscvSmpClusterGen.vexRiscvCluster(cpuCount)).doSimUntilVoid(seed = 42){dut => - SimTimeout(10000*10*cpuCount) + disableSimWave() + SimTimeout(100000000l*10*cpuCount) + dut.clockDomain.forkSimSpeedPrinter(1.0) VexRiscvSmpClusterTestInfrastructure.init(dut) val ram = VexRiscvSmpClusterTestInfrastructure.ram(dut) ram.memory.loadBin(0x80000000l, "src/test/cpp/raw/smp/build/smp.bin") diff --git a/src/main/scala/vexriscv/ip/DataCache.scala b/src/main/scala/vexriscv/ip/DataCache.scala index d685a34..3464165 100644 --- a/src/main/scala/vexriscv/ip/DataCache.scala +++ b/src/main/scala/vexriscv/ip/DataCache.scala @@ -102,10 +102,10 @@ case class DataCacheCpuExecute(p : DataCacheConfig) extends Bundle with IMasterS val address = UInt(p.addressWidth bit) val haltIt = Bool val args = DataCacheCpuExecuteArgs(p) - val fence = Bool() + val totalyConsistent = Bool() override def asMaster(): Unit = { - out(isValid, args, address, fence) + out(isValid, args, address, totalyConsistent) in(haltIt) } } @@ -129,9 +129,10 @@ case class DataCacheCpuMemory(p : DataCacheConfig) extends Bundle with IMasterSl val isWrite = Bool val address = UInt(p.addressWidth bit) val mmuBus = MemoryTranslatorBus() + val fenceValid = Bool() override def asMaster(): Unit = { - out(isValid, isStuck, isRemoved, address) + out(isValid, isStuck, isRemoved, address, fenceValid) in(isWrite) slave(mmuBus) } @@ -148,11 +149,13 @@ case class DataCacheCpuWriteBack(p : DataCacheConfig) extends Bundle with IMaste val address = UInt(p.addressWidth bit) val mmuException, unalignedAccess, accessError = Bool() val keepMemRspData = Bool() //Used by external AMO to avoid having an internal buffer + val fenceValid = Bool() + val fenceFire = Bool() // val exceptionBus = if(p.catchSomething) Flow(ExceptionCause()) else null override def asMaster(): Unit = { - out(isValid,isStuck,isUser, address) + out(isValid,isStuck,isUser, address, fenceValid, fenceFire) in(haltIt, data, mmuException, unalignedAccess, accessError, isWrite, keepMemRspData) } } @@ -514,7 +517,7 @@ class DataCache(val p : DataCacheConfig) extends Component{ val counter = Reg(UInt(log2Up(pendingMax) + 1 bits)) init(0) counter := counter + U(io.mem.cmd.fire && io.mem.cmd.last) - U(io.mem.rsp.valid && io.mem.rsp.last) - val consistent = counter === 0 + val done = counter === 0 val full = RegNext(counter.msb) val last = counter === 1 @@ -529,16 +532,26 @@ class DataCache(val p : DataCacheConfig) extends Component{ val sync = withInvalidate generate new Area{ io.mem.sync.ready := True - val counter = Reg(UInt(log2Up(pendingMax) + 1 bits)) init(0) - counter := counter + U(io.mem.cmd.fire && io.mem.cmd.wr) - U(io.mem.sync.fire) + val pendingSync = Reg(UInt(log2Up(pendingMax) + 1 bits)) init(0) + val pendingSyncNext = pendingSync + U(io.mem.cmd.fire && io.mem.cmd.wr) - U(io.mem.sync.fire) + pendingSync := pendingSyncNext - val full = RegNext(counter.msb) + val full = RegNext(pendingSync.msb) io.cpu.execute.haltIt setWhen(full) - val consistent = counter === 0 + + val incoerentSync = Reg(UInt(log2Up(pendingMax) + 1 bits)) init(0) + incoerentSync := incoerentSync - U(io.mem.sync.fire && incoerentSync =/= 0) + when(io.cpu.writeBack.fenceValid){ incoerentSync := pendingSyncNext } + + + val totalyConsistent = pendingSync === 0 + val fenceConsistent = incoerentSync === 0 } + + val stage0 = new Area{ val mask = io.cpu.execute.size.mux ( U(0) -> B"0001", @@ -548,10 +561,14 @@ class DataCache(val p : DataCacheConfig) extends Component{ val dataColisions = collisionProcess(io.cpu.execute.address(lineRange.high downto wordRange.low), mask) val wayInvalidate = B(0, wayCount bits) //Used if invalidate enabled - when(io.cpu.execute.fence){ - val consistent = if(withInvalidate) sync.consistent else if(withWriteResponse) pending.consistent else null - if(consistent != null){ - when(!consistent || io.cpu.memory.isValid && io.cpu.memory.isWrite || io.cpu.writeBack.isValid && io.cpu.memory.isWrite){ + val isAmo = if(withAmo) io.cpu.execute.isAmo else False + + //Ensure write to read consistency + val consistancyCheck = (withInvalidate || withWriteResponse) generate new Area { + val fenceConsistent = (if(withInvalidate) sync.fenceConsistent else pending.done) && !io.cpu.writeBack.fenceValid && !io.cpu.memory.fenceValid //Pessimistic fence tracking + val totalyConsistent = (if(withInvalidate) sync.totalyConsistent else pending.done) && !(io.cpu.memory.isValid && io.cpu.memory.isWrite) && !(io.cpu.writeBack.isValid && io.cpu.memory.isWrite) + when(io.cpu.execute.isValid && (!io.cpu.execute.args.wr || isAmo)){ + when(!fenceConsistent || io.cpu.execute.totalyConsistent && !totalyConsistent){ io.cpu.execute.haltIt := True } } @@ -632,7 +649,6 @@ class DataCache(val p : DataCacheConfig) extends Component{ } } - val lrSc = withInternalLrSc generate new Area{ val reserved = RegInit(False) when(io.cpu.writeBack.isValid && !io.cpu.writeBack.isStuck && request.isLrsc @@ -923,7 +939,7 @@ class DataCache(val p : DataCacheConfig) extends Component{ io.mem.ack.hit := wayHit //Manage invalidation read during write hazard - s1.invalidations := RegNext((input.valid && input.enable) ? wayHits | 0) + s1.invalidations := RegNextWhen((input.valid && input.enable && input.address(lineRange) === s0.input.address(lineRange)) ? wayHits | 0, s0.input.ready) } } } \ No newline at end of file diff --git a/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala b/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala index 769ed07..82c7953 100644 --- a/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala +++ b/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala @@ -50,7 +50,8 @@ class DBusCachedPlugin(val config : DataCacheConfig, object MEMORY_LRSC extends Stageable(Bool) object MEMORY_AMO extends Stageable(Bool) object MEMORY_FENCE extends Stageable(Bool) - object MEMORY_FENCE_DECODED extends Stageable(Bool) + object MEMORY_FENCE_FRONT extends Stageable(Bool) + object MEMORY_FENCE_BACK extends Stageable(Bool) object IS_DBUS_SHARING extends Stageable(Bool()) object MEMORY_VIRTUAL_ADDRESS extends Stageable(UInt(32 bits)) @@ -224,17 +225,22 @@ class DBusCachedPlugin(val config : DataCacheConfig, def PS = PW || PO } + //Manage write to read hit ordering (ensure invalidation timings) val fence = new Area{ - val hazard = False + insert(MEMORY_FENCE_FRONT) := False + insert(MEMORY_FENCE_BACK) := False val ff = input(INSTRUCTION)(31 downto 20).as(FenceFlags()) if(withWriteResponse){ - hazard setWhen(input(MEMORY_FENCE) && (ff.PS && ff.SL)) //Manage write to read hit ordering (ensure invalidation timings) - when(input(INSTRUCTION)(26 downto 25) =/= 0){ - if(withLrSc) hazard setWhen(input(MEMORY_LRSC)) - if(withAmo) hazard setWhen(input(MEMORY_AMO)) + insert(MEMORY_FENCE_BACK) setWhen(input(MEMORY_FENCE) && (ff.PS && ff.SL)) + when(input(INSTRUCTION)(26)) { //AQ + if(withLrSc) insert(MEMORY_FENCE_BACK) setWhen(input(MEMORY_LRSC)) + if(withAmo) insert(MEMORY_FENCE_BACK) setWhen(input(MEMORY_AMO)) + } + when(input(INSTRUCTION)(25)) { //RL but a bit pessimistic as could be MEMORY_FENCE_BACK when the memory op isn't a read + if(withLrSc) insert(MEMORY_FENCE_FRONT) setWhen(input(MEMORY_LRSC)) + if(withAmo) insert(MEMORY_FENCE_FRONT) setWhen(input(MEMORY_AMO)) } } - insert(MEMORY_FENCE_DECODED) := hazard } } @@ -254,7 +260,7 @@ class DBusCachedPlugin(val config : DataCacheConfig, cache.io.cpu.flush.valid := arbitration.isValid && input(MEMORY_MANAGMENT) - cache.io.cpu.execute.fence := arbitration.isValid && input(MEMORY_FENCE_DECODED) + cache.io.cpu.execute.totalyConsistent := arbitration.isValid && input(MEMORY_FENCE_FRONT) arbitration.haltItself setWhen(cache.io.cpu.flush.isStall || cache.io.cpu.execute.haltIt) if(withLrSc) { @@ -296,6 +302,8 @@ class DBusCachedPlugin(val config : DataCacheConfig, cache.io.cpu.memory.mmuBus <> mmuBus cache.io.cpu.memory.mmuBus.rsp.isIoAccess setWhen(pipeline(DEBUG_BYPASS_CACHE) && !cache.io.cpu.memory.isWrite) + + cache.io.cpu.memory.fenceValid := arbitration.isValid && input(MEMORY_FENCE_BACK) } val managementStage = stages.last @@ -306,6 +314,9 @@ class DBusCachedPlugin(val config : DataCacheConfig, cache.io.cpu.writeBack.isUser := (if(privilegeService != null) privilegeService.isUser() else False) cache.io.cpu.writeBack.address := U(input(REGFILE_WRITE_DATA)) + cache.io.cpu.writeBack.fenceValid := arbitration.isValid && input(MEMORY_FENCE_BACK) + cache.io.cpu.writeBack.fenceFire := arbitration.isFiring && input(MEMORY_FENCE_BACK) + redoBranch.valid := False redoBranch.payload := input(PC) arbitration.flushIt setWhen(redoBranch.valid) diff --git a/src/test/cpp/raw/smp/build/smp.asm b/src/test/cpp/raw/smp/build/smp.asm index 06f2616..19fc727 100644 --- a/src/test/cpp/raw/smp/build/smp.asm +++ b/src/test/cpp/raw/smp/build/smp.asm @@ -10,24 +10,24 @@ Disassembly of section .crt_section: 80000008: f1402373 csrr t1,mhartid 8000000c: 01031313 slli t1,t1,0x10 80000010: 006282b3 add t0,t0,t1 -80000014: 0082a023 sw s0,0(t0) # f8000000 +80000014: 0082a023 sw s0,0(t0) # f8000000 80000018 : 80000018: 00100513 li a0,1 8000001c: 00000597 auipc a1,0x0 -80000020: 1d058593 addi a1,a1,464 # 800001ec +80000020: 36058593 addi a1,a1,864 # 8000037c 80000024: 00a5a02f amoadd.w zero,a0,(a1) 80000028 : 80000028: 00000417 auipc s0,0x0 -8000002c: 1c442403 lw s0,452(s0) # 800001ec +8000002c: 35442403 lw s0,852(s0) # 8000037c 80000030: 19000513 li a0,400 -80000034: 1ac000ef jal ra,800001e0 +80000034: 33c000ef jal ra,80000370 80000038: 00000497 auipc s1,0x0 -8000003c: 1b44a483 lw s1,436(s1) # 800001ec +8000003c: 3444a483 lw s1,836(s1) # 8000037c 80000040: fe8494e3 bne s1,s0,80000028 80000044: f80002b7 lui t0,0xf8000 -80000048: 00428293 addi t0,t0,4 # f8000004 +80000048: 00428293 addi t0,t0,4 # f8000004 8000004c: f1402373 csrr t1,mhartid 80000050: 01031313 slli t1,t1,0x10 80000054: 006282b3 add t0,t0,t1 @@ -35,132 +35,319 @@ Disassembly of section .crt_section: 8000005c : 8000005c: 00100513 li a0,1 -80000060: 040000ef jal ra,800000a0 +80000060: 1d0000ef jal ra,80000230 80000064: 00200513 li a0,2 -80000068: 038000ef jal ra,800000a0 +80000068: 1c8000ef jal ra,80000230 8000006c: 00300513 li a0,3 -80000070: 030000ef jal ra,800000a0 +80000070: 1c0000ef jal ra,80000230 80000074: 00400513 li a0,4 -80000078: 0a4000ef jal ra,8000011c +80000078: 234000ef jal ra,800002ac 8000007c: 00500513 li a0,5 -80000080: 09c000ef jal ra,8000011c +80000080: 22c000ef jal ra,800002ac 80000084: 00600513 li a0,6 -80000088: 094000ef jal ra,8000011c +80000088: 224000ef jal ra,800002ac 8000008c: 00700513 li a0,7 -80000090: 010000ef jal ra,800000a0 +80000090: 1a0000ef jal ra,80000230 80000094: 00800513 li a0,8 -80000098: 084000ef jal ra,8000011c -8000009c: 1000006f j 8000019c +80000098: 214000ef jal ra,800002ac +8000009c: 00000197 auipc gp,0x0 +800000a0: 2ec1a183 lw gp,748(gp) # 80000388 -800000a0 : -800000a0: f80002b7 lui t0,0xf8000 -800000a4: 00c28293 addi t0,t0,12 # f800000c -800000a8: f1402373 csrr t1,mhartid -800000ac: 01031313 slli t1,t1,0x10 -800000b0: 006282b3 add t0,t0,t1 -800000b4: 00a2a023 sw a0,0(t0) -800000b8: 00000e97 auipc t4,0x0 -800000bc: 13ceae83 lw t4,316(t4) # 800001f4 +800000a4 : +800000a4: 00018513 mv a0,gp +800000a8: 00118193 addi gp,gp,1 +800000ac: 200000ef jal ra,800002ac +800000b0: 00000297 auipc t0,0x0 +800000b4: 2e42a283 lw t0,740(t0) # 80000394 +800000b8: 00a00313 li t1,10 +800000bc: 1662d863 bge t0,t1,8000022c 800000c0: 00000297 auipc t0,0x0 -800000c4: 13028293 addi t0,t0,304 # 800001f0 -800000c8: 00100313 li t1,1 -800000cc: 0062a2af amoadd.w t0,t1,(t0) -800000d0: 00128293 addi t0,t0,1 -800000d4: 00000317 auipc t1,0x0 -800000d8: 11832303 lw t1,280(t1) # 800001ec -800000dc: 00629c63 bne t0,t1,800000f4 -800000e0: 001e8293 addi t0,t4,1 -800000e4: 00000317 auipc t1,0x0 -800000e8: 10032623 sw zero,268(t1) # 800001f0 -800000ec: 00000317 auipc t1,0x0 -800000f0: 10532423 sw t0,264(t1) # 800001f4 +800000c4: 2cc2a283 lw t0,716(t0) # 8000038c +800000c8: 00000317 auipc t1,0x0 +800000cc: 2c832303 lw t1,712(t1) # 80000390 +800000d0: 06628a63 beq t0,t1,80000144 +800000d4: f14022f3 csrr t0,mhartid +800000d8: 00000317 auipc t1,0x0 +800000dc: 2b432303 lw t1,692(t1) # 8000038c +800000e0: 00000417 auipc s0,0x0 +800000e4: 32040413 addi s0,s0,800 # 80000400 +800000e8: 00000497 auipc s1,0x0 +800000ec: 31c48493 addi s1,s1,796 # 80000404 +800000f0: 02628863 beq t0,t1,80000120 +800000f4: 00000317 auipc t1,0x0 +800000f8: 29c32303 lw t1,668(t1) # 80000390 +800000fc: 00000417 auipc s0,0x0 +80000100: 30840413 addi s0,s0,776 # 80000404 +80000104: 00000497 auipc s1,0x0 +80000108: 2fc48493 addi s1,s1,764 # 80000400 +8000010c: 00628a63 beq t0,t1,80000120 -800000f4 : -800000f4: 00000297 auipc t0,0x0 -800000f8: 1002a283 lw t0,256(t0) # 800001f4 -800000fc: ffd28ce3 beq t0,t4,800000f4 -80000100: f80002b7 lui t0,0xf8000 -80000104: 01028293 addi t0,t0,16 # f8000010 -80000108: f1402373 csrr t1,mhartid -8000010c: 01031313 slli t1,t1,0x10 -80000110: 006282b3 add t0,t0,t1 -80000114: 00a2a023 sw a0,0(t0) -80000118: 00008067 ret +80000110 : +80000110: 00018513 mv a0,gp +80000114: 00118193 addi gp,gp,1 +80000118: 194000ef jal ra,800002ac +8000011c: 0280006f j 80000144 -8000011c : -8000011c: f80002b7 lui t0,0xf8000 -80000120: 00c28293 addi t0,t0,12 # f800000c -80000124: f1402373 csrr t1,mhartid -80000128: 01031313 slli t1,t1,0x10 -8000012c: 006282b3 add t0,t0,t1 -80000130: 00a2a023 sw a0,0(t0) -80000134: 00000e97 auipc t4,0x0 -80000138: 0c0eae83 lw t4,192(t4) # 800001f4 -8000013c: 00000297 auipc t0,0x0 -80000140: 0b428293 addi t0,t0,180 # 800001f0 +80000120 : +80000120: 29a00913 li s2,666 +80000124: 00018513 mv a0,gp +80000128: 00118193 addi gp,gp,1 +8000012c: 0004a983 lw s3,0(s1) +80000130: 17c000ef jal ra,800002ac +80000134: 01242023 sw s2,0(s0) +80000138: 0120000f fence w,r +8000013c: 0004a983 lw s3,0(s1) +80000140: 05342023 sw s3,64(s0) -80000144 : -80000144: 1002a32f lr.w t1,(t0) -80000148: 00130313 addi t1,t1,1 -8000014c: 1862a3af sc.w t2,t1,(t0) -80000150: fe039ae3 bnez t2,80000144 -80000154: 00000297 auipc t0,0x0 -80000158: 0982a283 lw t0,152(t0) # 800001ec -8000015c: 00629c63 bne t0,t1,80000174 -80000160: 001e8293 addi t0,t4,1 +80000144 : +80000144: 0330000f fence rw,rw +80000148: 00018513 mv a0,gp +8000014c: 00118193 addi gp,gp,1 +80000150: 15c000ef jal ra,800002ac +80000154: f14022f3 csrr t0,mhartid +80000158: f40296e3 bnez t0,800000a4 + +8000015c : +8000015c: 00000297 auipc t0,0x0 +80000160: 2302a283 lw t0,560(t0) # 8000038c 80000164: 00000317 auipc t1,0x0 -80000168: 08032623 sw zero,140(t1) # 800001f0 -8000016c: 00000317 auipc t1,0x0 -80000170: 08532423 sw t0,136(t1) # 800001f4 +80000168: 22c32303 lw t1,556(t1) # 80000390 +8000016c: 04628263 beq t0,t1,800001b0 +80000170: 00000517 auipc a0,0x0 +80000174: 2d452503 lw a0,724(a0) # 80000444 +80000178: f80002b7 lui t0,0xf8000 +8000017c: 01428293 addi t0,t0,20 # f8000014 +80000180: f1402373 csrr t1,mhartid +80000184: 01031313 slli t1,t1,0x10 +80000188: 006282b3 add t0,t0,t1 +8000018c: 00a2a023 sw a0,0(t0) +80000190: 00000517 auipc a0,0x0 +80000194: 2b052503 lw a0,688(a0) # 80000440 +80000198: f80002b7 lui t0,0xf8000 +8000019c: 01428293 addi t0,t0,20 # f8000014 +800001a0: f1402373 csrr t1,mhartid +800001a4: 01031313 slli t1,t1,0x10 +800001a8: 006282b3 add t0,t0,t1 +800001ac: 00a2a023 sw a0,0(t0) -80000174 : -80000174: 00000297 auipc t0,0x0 -80000178: 0802a283 lw t0,128(t0) # 800001f4 -8000017c: ffd28ce3 beq t0,t4,80000174 -80000180: f80002b7 lui t0,0xf8000 -80000184: 01028293 addi t0,t0,16 # f8000010 -80000188: f1402373 csrr t1,mhartid -8000018c: 01031313 slli t1,t1,0x10 -80000190: 006282b3 add t0,t0,t1 -80000194: 00a2a023 sw a0,0(t0) -80000198: 00008067 ret +800001b0 : +800001b0: f14022f3 csrr t0,mhartid +800001b4: ee0298e3 bnez t0,800000a4 +800001b8: 00000297 auipc t0,0x0 +800001bc: 2402a423 sw zero,584(t0) # 80000400 +800001c0: 00000297 auipc t0,0x0 +800001c4: 2402a223 sw zero,580(t0) # 80000404 +800001c8: 00000417 auipc s0,0x0 +800001cc: 1b442403 lw s0,436(s0) # 8000037c +800001d0: 00000297 auipc t0,0x0 +800001d4: 1c02a283 lw t0,448(t0) # 80000390 +800001d8: 00128293 addi t0,t0,1 +800001dc: 00000317 auipc t1,0x0 +800001e0: 1a532a23 sw t0,436(t1) # 80000390 +800001e4: 04829063 bne t0,s0,80000224 +800001e8: 00000317 auipc t1,0x0 +800001ec: 1a032423 sw zero,424(t1) # 80000390 +800001f0: 00000297 auipc t0,0x0 +800001f4: 19c2a283 lw t0,412(t0) # 8000038c +800001f8: 00128293 addi t0,t0,1 +800001fc: 00000317 auipc t1,0x0 +80000200: 18532823 sw t0,400(t1) # 8000038c +80000204: 02829063 bne t0,s0,80000224 +80000208: 00000317 auipc t1,0x0 +8000020c: 18032223 sw zero,388(t1) # 8000038c +80000210: 00000297 auipc t0,0x0 +80000214: 1842a283 lw t0,388(t0) # 80000394 +80000218: 00128293 addi t0,t0,1 +8000021c: 00000317 auipc t1,0x0 +80000220: 16532c23 sw t0,376(t1) # 80000394 -8000019c : -8000019c: 00000413 li s0,0 -800001a0: f80002b7 lui t0,0xf8000 -800001a4: 00828293 addi t0,t0,8 # f8000008 -800001a8: f1402373 csrr t1,mhartid -800001ac: 01031313 slli t1,t1,0x10 -800001b0: 006282b3 add t0,t0,t1 -800001b4: 0082a023 sw s0,0(t0) -800001b8: 0240006f j 800001dc +80000224 : +80000224: 0130000f fence w,rw +80000228: e7dff06f j 800000a4 -800001bc : -800001bc: 00100413 li s0,1 -800001c0: f80002b7 lui t0,0xf8000 -800001c4: 00828293 addi t0,t0,8 # f8000008 -800001c8: f1402373 csrr t1,mhartid -800001cc: 01031313 slli t1,t1,0x10 -800001d0: 006282b3 add t0,t0,t1 -800001d4: 0082a023 sw s0,0(t0) -800001d8: 0040006f j 800001dc +8000022c : +8000022c: 1000006f j 8000032c -800001dc : -800001dc: 0000006f j 800001dc +80000230 : +80000230: f80002b7 lui t0,0xf8000 +80000234: 00c28293 addi t0,t0,12 # f800000c +80000238: f1402373 csrr t1,mhartid +8000023c: 01031313 slli t1,t1,0x10 +80000240: 006282b3 add t0,t0,t1 +80000244: 00a2a023 sw a0,0(t0) +80000248: 00000e97 auipc t4,0x0 +8000024c: 13ceae83 lw t4,316(t4) # 80000384 +80000250: 00000297 auipc t0,0x0 +80000254: 13028293 addi t0,t0,304 # 80000380 +80000258: 00100313 li t1,1 +8000025c: 0062a2af amoadd.w t0,t1,(t0) +80000260: 00128293 addi t0,t0,1 +80000264: 00000317 auipc t1,0x0 +80000268: 11832303 lw t1,280(t1) # 8000037c +8000026c: 00629c63 bne t0,t1,80000284 +80000270: 001e8293 addi t0,t4,1 +80000274: 00000317 auipc t1,0x0 +80000278: 10032623 sw zero,268(t1) # 80000380 +8000027c: 00000317 auipc t1,0x0 +80000280: 10532423 sw t0,264(t1) # 80000384 -800001e0 : -800001e0: fff50513 addi a0,a0,-1 -800001e4: fe051ee3 bnez a0,800001e0 -800001e8: 00008067 ret +80000284 : +80000284: 00000297 auipc t0,0x0 +80000288: 1002a283 lw t0,256(t0) # 80000384 +8000028c: ffd28ce3 beq t0,t4,80000284 +80000290: f80002b7 lui t0,0xf8000 +80000294: 01028293 addi t0,t0,16 # f8000010 +80000298: f1402373 csrr t1,mhartid +8000029c: 01031313 slli t1,t1,0x10 +800002a0: 006282b3 add t0,t0,t1 +800002a4: 00a2a023 sw a0,0(t0) +800002a8: 00008067 ret -800001ec : -800001ec: 0000 unimp +800002ac : +800002ac: f80002b7 lui t0,0xf8000 +800002b0: 00c28293 addi t0,t0,12 # f800000c +800002b4: f1402373 csrr t1,mhartid +800002b8: 01031313 slli t1,t1,0x10 +800002bc: 006282b3 add t0,t0,t1 +800002c0: 00a2a023 sw a0,0(t0) +800002c4: 00000e97 auipc t4,0x0 +800002c8: 0c0eae83 lw t4,192(t4) # 80000384 +800002cc: 00000297 auipc t0,0x0 +800002d0: 0b428293 addi t0,t0,180 # 80000380 + +800002d4 : +800002d4: 1002a32f lr.w t1,(t0) +800002d8: 00130313 addi t1,t1,1 +800002dc: 1862a3af sc.w t2,t1,(t0) +800002e0: fe039ae3 bnez t2,800002d4 +800002e4: 00000297 auipc t0,0x0 +800002e8: 0982a283 lw t0,152(t0) # 8000037c +800002ec: 00629c63 bne t0,t1,80000304 +800002f0: 001e8293 addi t0,t4,1 +800002f4: 00000317 auipc t1,0x0 +800002f8: 08032623 sw zero,140(t1) # 80000380 +800002fc: 00000317 auipc t1,0x0 +80000300: 08532423 sw t0,136(t1) # 80000384 + +80000304 : +80000304: 00000297 auipc t0,0x0 +80000308: 0802a283 lw t0,128(t0) # 80000384 +8000030c: ffd28ce3 beq t0,t4,80000304 +80000310: f80002b7 lui t0,0xf8000 +80000314: 01028293 addi t0,t0,16 # f8000010 +80000318: f1402373 csrr t1,mhartid +8000031c: 01031313 slli t1,t1,0x10 +80000320: 006282b3 add t0,t0,t1 +80000324: 00a2a023 sw a0,0(t0) +80000328: 00008067 ret + +8000032c : +8000032c: 00000413 li s0,0 +80000330: f80002b7 lui t0,0xf8000 +80000334: 00828293 addi t0,t0,8 # f8000008 +80000338: f1402373 csrr t1,mhartid +8000033c: 01031313 slli t1,t1,0x10 +80000340: 006282b3 add t0,t0,t1 +80000344: 0082a023 sw s0,0(t0) +80000348: 0240006f j 8000036c + +8000034c : +8000034c: 00100413 li s0,1 +80000350: f80002b7 lui t0,0xf8000 +80000354: 00828293 addi t0,t0,8 # f8000008 +80000358: f1402373 csrr t1,mhartid +8000035c: 01031313 slli t1,t1,0x10 +80000360: 006282b3 add t0,t0,t1 +80000364: 0082a023 sw s0,0(t0) +80000368: 0040006f j 8000036c + +8000036c : +8000036c: 0000006f j 8000036c + +80000370 : +80000370: fff50513 addi a0,a0,-1 +80000374: fe051ee3 bnez a0,80000370 +80000378: 00008067 ret + +8000037c : +8000037c: 0000 unimp ... -800001f0 : -800001f0: 0000 unimp +80000380 : +80000380: 0000 unimp ... -800001f4 : -800001f4: 0000 unimp +80000384 : +80000384: 0000 unimp + ... + +80000388 : +80000388: 1000 addi s0,sp,32 + ... + +8000038c : +8000038c: 0000 unimp + ... + +80000390 : +80000390: 0000 unimp + ... + +80000394 : +80000394: 0000 unimp +80000396: 0000 unimp +80000398: 00000013 nop +8000039c: 00000013 nop +800003a0: 00000013 nop +800003a4: 00000013 nop +800003a8: 00000013 nop +800003ac: 00000013 nop +800003b0: 00000013 nop +800003b4: 00000013 nop +800003b8: 00000013 nop +800003bc: 00000013 nop +800003c0: 00000013 nop +800003c4: 00000013 nop +800003c8: 00000013 nop +800003cc: 00000013 nop +800003d0: 00000013 nop +800003d4: 00000013 nop +800003d8: 00000013 nop +800003dc: 00000013 nop +800003e0: 00000013 nop +800003e4: 00000013 nop +800003e8: 00000013 nop +800003ec: 00000013 nop +800003f0: 00000013 nop +800003f4: 00000013 nop +800003f8: 00000013 nop +800003fc: 00000013 nop + +80000400 : +80000400: 0000 unimp + ... + +80000404 : +80000404: 0000 unimp +80000406: 0000 unimp +80000408: 00000013 nop +8000040c: 00000013 nop +80000410: 00000013 nop +80000414: 00000013 nop +80000418: 00000013 nop +8000041c: 00000013 nop +80000420: 00000013 nop +80000424: 00000013 nop +80000428: 00000013 nop +8000042c: 00000013 nop +80000430: 00000013 nop +80000434: 00000013 nop +80000438: 00000013 nop +8000043c: 00000013 nop + +80000440 : +80000440: 0000 unimp + ... + +80000444 : ... diff --git a/src/test/cpp/raw/smp/build/smp.bin b/src/test/cpp/raw/smp/build/smp.bin index 59a832fee66759269250ede3b78662b6fdd2d1d3..b391a14e6fda4e289fe59a0e77fca47434fcb461 100755 GIT binary patch literal 1144 zcmcgqJ#Q015S`n-MUFtpg%R-wAv(MVBE-Z70`xTqNq>=iateOqPRw}TSRePV~RI5UqI2&_+;B6ji;|xanpk6*1YE2Hlw|`x{AC5 z;rn30;9e=q$)9YN!aKpCFGYV~@Rb$q8*Tf7_8Ya{oDXjY%y6%l4cDrSxH9F!Pc1)R zhX%u$&hmJ#`P?QC-K=yyYYtpXW9hDmIjHdQ8RCy={p2D0YP>Ny(kq54u(LE%e(LJE z(afyLnZ;muvGO}Ut^J67(zWF_^U@eIPxnIib6n$Q%oZbc6tu29TPuY3l%1|UUNC$w zzt5kk<%mybB8xm{_-bySFVz;PKdb)v!^b*+vzy*^Z|bgNXwQ=Fi?^urBgN<`Ij|E4 zv^GBIZsx-$8ZQ}Nr@3Q1)f}VAoaFAiJyW0kGM5_-fP=MXj~MOh;qn%9kR$AD1-axg zADa4VW=cOZ($5&4W&Wg(G-KmIP4%y!4n9NMIf$n-^cKapy<|xYbw3lIGl2jJ{*QmG M`mb8!{J)Jq0A*P1s{jB1 delta 102 zcmeyt@q>ASvaGEBf~5@NEDQ|HDlRg@tPGOx84gHI2lAVjxJXZQP_|?dW>sK#&(Odu s%xVCn8Gy6{kapk_W(@$+2RMaU6F__*T>z426lQGz@i{i@FvTze0M}I)6#xJL diff --git a/src/test/cpp/raw/smp/src/crt.S b/src/test/cpp/raw/smp/src/crt.S index 72cc5b8..4f984eb 100644 --- a/src/test/cpp/raw/smp/src/crt.S +++ b/src/test/cpp/raw/smp/src/crt.S @@ -1,9 +1,13 @@ +#define CONSISTENCY_REDO_COUNT 10 + + #define REPORT_OFFSET 0xF8000000 #define REPORT_THREAD_ID 0x00 #define REPORT_THREAD_COUNT 0x04 #define REPORT_END 0x08 #define REPORT_BARRIER_START 0x0C #define REPORT_BARRIER_END 0x10 +#define REPORT_CONSISTENCY_VALUES 0x14 #define report(reg, id) \ li t0, REPORT_OFFSET+id; \ @@ -54,6 +58,92 @@ barrier_amo_test: call barrier_lrsc + lw gp, barrier_allocator +consistancy_loop: + //Sync + mv a0, gp + addi gp, gp, 1 + call barrier_lrsc + + //all passed ? + lw t0, consistancy_all_tested + li t1, CONSISTENCY_REDO_COUNT + bge t0, t1, consistancy_passed + + //identify who is A, who is B + lw t0, consistancy_a_hart + lw t1, consistancy_b_hart + beq t0, t1, consistancy_join + csrr t0, mhartid + lw t1, consistancy_a_hart + la s0, consistancy_a_value + la s1, consistancy_b_value + beq t0, t1, consistancy_do + lw t1, consistancy_b_hart + la s0, consistancy_b_value + la s1, consistancy_a_value + beq t0, t1, consistancy_do + +consistancy_hart_not_involved: + mv a0, gp + addi gp, gp, 1 + call barrier_lrsc + j consistancy_join + +consistancy_do: + li s2, 666 + mv a0, gp + addi gp, gp, 1 + lw s3, (s1) //Help getting the cache loaded for the consistancy check + call barrier_lrsc + + //Consistancy check : write to read ordering on two thread + sw s2, (s0) + fence w,r + lw s3, (s1) + sw s3, 64(s0) + +consistancy_join: + fence rw, rw //ensure updated values + mv a0, gp + addi gp, gp, 1 + call barrier_lrsc + csrr t0, mhartid + bnez t0, consistancy_loop + +consistancy_assert: + lw t0, consistancy_a_hart + lw t1, consistancy_b_hart + beq t0, t1, consistancy_increment + lw a0, consistancy_a_readed + report(a0, REPORT_CONSISTENCY_VALUES) + lw a0, consistancy_b_readed + report(a0, REPORT_CONSISTENCY_VALUES) + +consistancy_increment: + csrr t0, mhartid + bnez t0, consistancy_loop + sw x0, (consistancy_a_value), t0 + sw x0, (consistancy_b_value), t0 + lw s0,thread_count + lw t0,consistancy_b_hart + addi t0, t0, 1 + sw t0, consistancy_b_hart, t1 + bne t0, s0, consistancy_increment_fence + sw x0, consistancy_b_hart, t1 + lw t0,consistancy_a_hart + addi t0, t0, 1 + sw t0, consistancy_a_hart, t1 + bne t0, s0, consistancy_increment_fence + sw x0, consistancy_a_hart, t1 + lw t0, consistancy_all_tested + addi t0, t0, 1 + sw t0, consistancy_all_tested, t1 +consistancy_increment_fence: + fence w, rw + j consistancy_loop + +consistancy_passed: j success @@ -78,24 +168,25 @@ barrier_amo_wait: ret barrier_lrsc: - report(a0, REPORT_BARRIER_START) - lw ENTRY_PHASE, barrier_phase - la t0, barrier_value + report(a0, REPORT_BARRIER_START) + lw ENTRY_PHASE, barrier_phase + la t0, barrier_value barrier_lrsc_try: - lr.w t1, (t0) - addi t1, t1, 1 - sc.w t2, t1, (t0) - bnez t2, barrier_lrsc_try - lw t0, thread_count - bne t0, t1, barrier_lrsc_wait - addi t0,ENTRY_PHASE,1 - sw x0, barrier_value, t1 - sw t0, barrier_phase, t1 + lr.w t1, (t0) + addi t1, t1, 1 + sc.w t2, t1, (t0) + bnez t2, barrier_lrsc_try + lw t0, thread_count + bne t0, t1, barrier_lrsc_wait + addi t0,ENTRY_PHASE,1 + sw x0, barrier_value, t1 + sw t0, barrier_phase, t1 barrier_lrsc_wait: - lw t0, barrier_phase - beq t0, ENTRY_PHASE, barrier_lrsc_wait - report(a0, REPORT_BARRIER_END) - ret + lw t0, barrier_phase + beq t0, ENTRY_PHASE, barrier_lrsc_wait + report(a0, REPORT_BARRIER_END) + ret + @@ -120,5 +211,27 @@ sleep: thread_count: .word 0 + +.align 6 //Same cache line barrier_value: .word 0 barrier_phase: .word 0 +barrier_allocator: .word 0x1000 + +consistancy_a_hart: .word 0 +consistancy_b_hart: .word 0 +consistancy_all_tested: .word 0 + + +nop;nop;nop;nop;nop;nop;nop;nop; +nop;nop;nop;nop;nop;nop;nop;nop; +.align 6 //Same cache line +consistancy_a_value: .word 0 +consistancy_b_value: .word 0 + +.align 6 //Same cache line +consistancy_b_readed: .word 0 +consistancy_a_readed: .word 0 + +.align 6 //Same cache line +consistancy_init_call: .word 0 +consistancy_do_call: .word 0 \ No newline at end of file From 056bf638663f69da7142fc2f8fd9d23729eb3387 Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Tue, 21 Apr 2020 16:03:03 +0200 Subject: [PATCH 32/91] Add more consistancy tests --- .../demo/smp/VexRiscvSmpCluster.scala | 4 +- .../vexriscv/plugin/DBusCachedPlugin.scala | 2 +- src/test/cpp/raw/smp/build/smp.asm | 660 ++++++++++-------- src/test/cpp/raw/smp/build/smp.bin | Bin 1144 -> 1388 bytes src/test/cpp/raw/smp/src/crt.S | 68 +- 5 files changed, 437 insertions(+), 297 deletions(-) diff --git a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala index 0371b29..f823f4f 100644 --- a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala +++ b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala @@ -488,7 +488,9 @@ object VexRiscvSmpClusterTest extends App{ } } - +// echo "echo 10000 | dhrystone >> log" > test +// time sh test & +// top -b -n 1 object VexRiscvSmpClusterOpenSbi extends App{ import spinal.core.sim._ diff --git a/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala b/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala index 82c7953..8308bc0 100644 --- a/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala +++ b/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala @@ -236,7 +236,7 @@ class DBusCachedPlugin(val config : DataCacheConfig, if(withLrSc) insert(MEMORY_FENCE_BACK) setWhen(input(MEMORY_LRSC)) if(withAmo) insert(MEMORY_FENCE_BACK) setWhen(input(MEMORY_AMO)) } - when(input(INSTRUCTION)(25)) { //RL but a bit pessimistic as could be MEMORY_FENCE_BACK when the memory op isn't a read + when(input(INSTRUCTION)(25)) { //RL but a bit pessimistic as it could be MEMORY_FENCE_BACK when the memory op isn't a read if(withLrSc) insert(MEMORY_FENCE_FRONT) setWhen(input(MEMORY_LRSC)) if(withAmo) insert(MEMORY_FENCE_FRONT) setWhen(input(MEMORY_AMO)) } diff --git a/src/test/cpp/raw/smp/build/smp.asm b/src/test/cpp/raw/smp/build/smp.asm index 19fc727..b0492f9 100644 --- a/src/test/cpp/raw/smp/build/smp.asm +++ b/src/test/cpp/raw/smp/build/smp.asm @@ -10,24 +10,24 @@ Disassembly of section .crt_section: 80000008: f1402373 csrr t1,mhartid 8000000c: 01031313 slli t1,t1,0x10 80000010: 006282b3 add t0,t0,t1 -80000014: 0082a023 sw s0,0(t0) # f8000000 +80000014: 0082a023 sw s0,0(t0) # f8000000 80000018 : 80000018: 00100513 li a0,1 8000001c: 00000597 auipc a1,0x0 -80000020: 36058593 addi a1,a1,864 # 8000037c +80000020: 42058593 addi a1,a1,1056 # 8000043c 80000024: 00a5a02f amoadd.w zero,a0,(a1) 80000028 : 80000028: 00000417 auipc s0,0x0 -8000002c: 35442403 lw s0,852(s0) # 8000037c +8000002c: 41442403 lw s0,1044(s0) # 8000043c 80000030: 19000513 li a0,400 -80000034: 33c000ef jal ra,80000370 +80000034: 3fc000ef jal ra,80000430 80000038: 00000497 auipc s1,0x0 -8000003c: 3444a483 lw s1,836(s1) # 8000037c +8000003c: 4044a483 lw s1,1028(s1) # 8000043c 80000040: fe8494e3 bne s1,s0,80000028 80000044: f80002b7 lui t0,0xf8000 -80000048: 00428293 addi t0,t0,4 # f8000004 +80000048: 00428293 addi t0,t0,4 # f8000004 8000004c: f1402373 csrr t1,mhartid 80000050: 01031313 slli t1,t1,0x10 80000054: 006282b3 add t0,t0,t1 @@ -35,319 +35,407 @@ Disassembly of section .crt_section: 8000005c : 8000005c: 00100513 li a0,1 -80000060: 1d0000ef jal ra,80000230 +80000060: 290000ef jal ra,800002f0 80000064: 00200513 li a0,2 -80000068: 1c8000ef jal ra,80000230 +80000068: 288000ef jal ra,800002f0 8000006c: 00300513 li a0,3 -80000070: 1c0000ef jal ra,80000230 +80000070: 280000ef jal ra,800002f0 80000074: 00400513 li a0,4 -80000078: 234000ef jal ra,800002ac +80000078: 2f4000ef jal ra,8000036c 8000007c: 00500513 li a0,5 -80000080: 22c000ef jal ra,800002ac +80000080: 2ec000ef jal ra,8000036c 80000084: 00600513 li a0,6 -80000088: 224000ef jal ra,800002ac +80000088: 2e4000ef jal ra,8000036c 8000008c: 00700513 li a0,7 -80000090: 1a0000ef jal ra,80000230 +80000090: 260000ef jal ra,800002f0 80000094: 00800513 li a0,8 -80000098: 214000ef jal ra,800002ac +80000098: 2d4000ef jal ra,8000036c 8000009c: 00000197 auipc gp,0x0 -800000a0: 2ec1a183 lw gp,748(gp) # 80000388 +800000a0: 3ac1a183 lw gp,940(gp) # 80000448 -800000a4 : -800000a4: 00018513 mv a0,gp -800000a8: 00118193 addi gp,gp,1 -800000ac: 200000ef jal ra,800002ac -800000b0: 00000297 auipc t0,0x0 -800000b4: 2e42a283 lw t0,740(t0) # 80000394 -800000b8: 00a00313 li t1,10 -800000bc: 1662d863 bge t0,t1,8000022c -800000c0: 00000297 auipc t0,0x0 -800000c4: 2cc2a283 lw t0,716(t0) # 8000038c -800000c8: 00000317 auipc t1,0x0 -800000cc: 2c832303 lw t1,712(t1) # 80000390 -800000d0: 06628a63 beq t0,t1,80000144 -800000d4: f14022f3 csrr t0,mhartid -800000d8: 00000317 auipc t1,0x0 -800000dc: 2b432303 lw t1,692(t1) # 8000038c -800000e0: 00000417 auipc s0,0x0 -800000e4: 32040413 addi s0,s0,800 # 80000400 -800000e8: 00000497 auipc s1,0x0 -800000ec: 31c48493 addi s1,s1,796 # 80000404 -800000f0: 02628863 beq t0,t1,80000120 -800000f4: 00000317 auipc t1,0x0 -800000f8: 29c32303 lw t1,668(t1) # 80000390 -800000fc: 00000417 auipc s0,0x0 -80000100: 30840413 addi s0,s0,776 # 80000404 -80000104: 00000497 auipc s1,0x0 -80000108: 2fc48493 addi s1,s1,764 # 80000400 -8000010c: 00628a63 beq t0,t1,80000120 +800000a4 : +800000a4: 00000297 auipc t0,0x0 +800000a8: 06828293 addi t0,t0,104 # 8000010c +800000ac: 00000317 auipc t1,0x0 +800000b0: 48532a23 sw t0,1172(t1) # 80000540 +800000b4: 00000297 auipc t0,0x0 +800000b8: 06028293 addi t0,t0,96 # 80000114 +800000bc: 00000317 auipc t1,0x0 +800000c0: 48532423 sw t0,1160(t1) # 80000544 +800000c4: 00000297 auipc t0,0x0 +800000c8: 01428293 addi t0,t0,20 # 800000d8 +800000cc: 00000317 auipc t1,0x0 +800000d0: 46532e23 sw t0,1148(t1) # 80000548 +800000d4: 0640006f j 80000138 -80000110 : -80000110: 00018513 mv a0,gp -80000114: 00118193 addi gp,gp,1 -80000118: 194000ef jal ra,800002ac -8000011c: 0280006f j 80000144 +800000d8 : +800000d8: 00000297 auipc t0,0x0 +800000dc: 03428293 addi t0,t0,52 # 8000010c +800000e0: 00000317 auipc t1,0x0 +800000e4: 46532023 sw t0,1120(t1) # 80000540 +800000e8: 00000297 auipc t0,0x0 +800000ec: 04028293 addi t0,t0,64 # 80000128 +800000f0: 00000317 auipc t1,0x0 +800000f4: 44532a23 sw t0,1108(t1) # 80000544 +800000f8: 00000297 auipc t0,0x0 +800000fc: 2f428293 addi t0,t0,756 # 800003ec +80000100: 00000317 auipc t1,0x0 +80000104: 44532423 sw t0,1096(t1) # 80000548 +80000108: 0300006f j 80000138 -80000120 : -80000120: 29a00913 li s2,666 -80000124: 00018513 mv a0,gp -80000128: 00118193 addi gp,gp,1 -8000012c: 0004a983 lw s3,0(s1) -80000130: 17c000ef jal ra,800002ac -80000134: 01242023 sw s2,0(s0) -80000138: 0120000f fence w,r -8000013c: 0004a983 lw s3,0(s1) -80000140: 05342023 sw s3,64(s0) +8000010c : +8000010c: 0004a983 lw s3,0(s1) +80000110: 0c40006f j 800001d4 -80000144 : -80000144: 0330000f fence rw,rw -80000148: 00018513 mv a0,gp -8000014c: 00118193 addi gp,gp,1 -80000150: 15c000ef jal ra,800002ac -80000154: f14022f3 csrr t0,mhartid -80000158: f40296e3 bnez t0,800000a4 +80000114 : +80000114: 01242023 sw s2,0(s0) +80000118: 0120000f fence w,r +8000011c: 0004a983 lw s3,0(s1) +80000120: 05342023 sw s3,64(s0) +80000124: 0cc0006f j 800001f0 -8000015c : -8000015c: 00000297 auipc t0,0x0 -80000160: 2302a283 lw t0,560(t0) # 8000038c -80000164: 00000317 auipc t1,0x0 -80000168: 22c32303 lw t1,556(t1) # 80000390 -8000016c: 04628263 beq t0,t1,800001b0 -80000170: 00000517 auipc a0,0x0 -80000174: 2d452503 lw a0,724(a0) # 80000444 -80000178: f80002b7 lui t0,0xf8000 -8000017c: 01428293 addi t0,t0,20 # f8000014 -80000180: f1402373 csrr t1,mhartid -80000184: 01031313 slli t1,t1,0x10 -80000188: 006282b3 add t0,t0,t1 -8000018c: 00a2a023 sw a0,0(t0) -80000190: 00000517 auipc a0,0x0 -80000194: 2b052503 lw a0,688(a0) # 80000440 -80000198: f80002b7 lui t0,0xf8000 -8000019c: 01428293 addi t0,t0,20 # f8000014 -800001a0: f1402373 csrr t1,mhartid -800001a4: 01031313 slli t1,t1,0x10 -800001a8: 006282b3 add t0,t0,t1 -800001ac: 00a2a023 sw a0,0(t0) +80000128 : +80000128: 01242023 sw s2,0(s0) +8000012c: 1204a9af lr.w.rl s3,(s1) +80000130: 05342023 sw s3,64(s0) +80000134: 0bc0006f j 800001f0 -800001b0 : -800001b0: f14022f3 csrr t0,mhartid -800001b4: ee0298e3 bnez t0,800000a4 -800001b8: 00000297 auipc t0,0x0 -800001bc: 2402a423 sw zero,584(t0) # 80000400 -800001c0: 00000297 auipc t0,0x0 -800001c4: 2402a223 sw zero,580(t0) # 80000404 -800001c8: 00000417 auipc s0,0x0 -800001cc: 1b442403 lw s0,436(s0) # 8000037c -800001d0: 00000297 auipc t0,0x0 -800001d4: 1c02a283 lw t0,448(t0) # 80000390 -800001d8: 00128293 addi t0,t0,1 -800001dc: 00000317 auipc t1,0x0 -800001e0: 1a532a23 sw t0,436(t1) # 80000390 -800001e4: 04829063 bne t0,s0,80000224 -800001e8: 00000317 auipc t1,0x0 -800001ec: 1a032423 sw zero,424(t1) # 80000390 -800001f0: 00000297 auipc t0,0x0 -800001f4: 19c2a283 lw t0,412(t0) # 8000038c -800001f8: 00128293 addi t0,t0,1 -800001fc: 00000317 auipc t1,0x0 -80000200: 18532823 sw t0,400(t1) # 8000038c -80000204: 02829063 bne t0,s0,80000224 -80000208: 00000317 auipc t1,0x0 -8000020c: 18032223 sw zero,388(t1) # 8000038c -80000210: 00000297 auipc t0,0x0 -80000214: 1842a283 lw t0,388(t0) # 80000394 -80000218: 00128293 addi t0,t0,1 -8000021c: 00000317 auipc t1,0x0 -80000220: 16532c23 sw t0,376(t1) # 80000394 +80000138 : +80000138: 00018513 mv a0,gp +8000013c: 00118193 addi gp,gp,1 +80000140: 22c000ef jal ra,8000036c +80000144: 00000297 auipc t0,0x0 +80000148: 3002a823 sw zero,784(t0) # 80000454 -80000224 : -80000224: 0130000f fence w,rw -80000228: e7dff06f j 800000a4 +8000014c : +8000014c: 00018513 mv a0,gp +80000150: 00118193 addi gp,gp,1 +80000154: 218000ef jal ra,8000036c +80000158: 00000297 auipc t0,0x0 +8000015c: 2fc2a283 lw t0,764(t0) # 80000454 +80000160: 03200313 li t1,50 +80000164: 1662da63 bge t0,t1,800002d8 +80000168: 00000297 auipc t0,0x0 +8000016c: 2e42a283 lw t0,740(t0) # 8000044c +80000170: 00000317 auipc t1,0x0 +80000174: 2e032303 lw t1,736(t1) # 80000450 +80000178: 06628c63 beq t0,t1,800001f0 +8000017c: f14022f3 csrr t0,mhartid +80000180: 00000317 auipc t1,0x0 +80000184: 2cc32303 lw t1,716(t1) # 8000044c +80000188: 00000417 auipc s0,0x0 +8000018c: 33840413 addi s0,s0,824 # 800004c0 +80000190: 00000497 auipc s1,0x0 +80000194: 33448493 addi s1,s1,820 # 800004c4 +80000198: 02628863 beq t0,t1,800001c8 +8000019c: 00000317 auipc t1,0x0 +800001a0: 2b432303 lw t1,692(t1) # 80000450 +800001a4: 00000417 auipc s0,0x0 +800001a8: 32040413 addi s0,s0,800 # 800004c4 +800001ac: 00000497 auipc s1,0x0 +800001b0: 31448493 addi s1,s1,788 # 800004c0 +800001b4: 00628a63 beq t0,t1,800001c8 -8000022c : -8000022c: 1000006f j 8000032c +800001b8 : +800001b8: 00018513 mv a0,gp +800001bc: 00118193 addi gp,gp,1 +800001c0: 1ac000ef jal ra,8000036c +800001c4: 02c0006f j 800001f0 -80000230 : -80000230: f80002b7 lui t0,0xf8000 -80000234: 00c28293 addi t0,t0,12 # f800000c -80000238: f1402373 csrr t1,mhartid -8000023c: 01031313 slli t1,t1,0x10 -80000240: 006282b3 add t0,t0,t1 -80000244: 00a2a023 sw a0,0(t0) -80000248: 00000e97 auipc t4,0x0 -8000024c: 13ceae83 lw t4,316(t4) # 80000384 -80000250: 00000297 auipc t0,0x0 -80000254: 13028293 addi t0,t0,304 # 80000380 -80000258: 00100313 li t1,1 -8000025c: 0062a2af amoadd.w t0,t1,(t0) -80000260: 00128293 addi t0,t0,1 -80000264: 00000317 auipc t1,0x0 -80000268: 11832303 lw t1,280(t1) # 8000037c -8000026c: 00629c63 bne t0,t1,80000284 -80000270: 001e8293 addi t0,t4,1 -80000274: 00000317 auipc t1,0x0 -80000278: 10032623 sw zero,268(t1) # 80000380 -8000027c: 00000317 auipc t1,0x0 -80000280: 10532423 sw t0,264(t1) # 80000384 +800001c8 : +800001c8: 00000297 auipc t0,0x0 +800001cc: 3782a283 lw t0,888(t0) # 80000540 +800001d0: 000280e7 jalr t0 -80000284 : -80000284: 00000297 auipc t0,0x0 -80000288: 1002a283 lw t0,256(t0) # 80000384 -8000028c: ffd28ce3 beq t0,t4,80000284 -80000290: f80002b7 lui t0,0xf8000 -80000294: 01028293 addi t0,t0,16 # f8000010 -80000298: f1402373 csrr t1,mhartid -8000029c: 01031313 slli t1,t1,0x10 -800002a0: 006282b3 add t0,t0,t1 -800002a4: 00a2a023 sw a0,0(t0) -800002a8: 00008067 ret +800001d4 : +800001d4: 29a00913 li s2,666 +800001d8: 00018513 mv a0,gp +800001dc: 00118193 addi gp,gp,1 +800001e0: 18c000ef jal ra,8000036c +800001e4: 00000297 auipc t0,0x0 +800001e8: 3602a283 lw t0,864(t0) # 80000544 +800001ec: 000280e7 jalr t0 -800002ac : -800002ac: f80002b7 lui t0,0xf8000 -800002b0: 00c28293 addi t0,t0,12 # f800000c -800002b4: f1402373 csrr t1,mhartid -800002b8: 01031313 slli t1,t1,0x10 -800002bc: 006282b3 add t0,t0,t1 -800002c0: 00a2a023 sw a0,0(t0) -800002c4: 00000e97 auipc t4,0x0 -800002c8: 0c0eae83 lw t4,192(t4) # 80000384 -800002cc: 00000297 auipc t0,0x0 -800002d0: 0b428293 addi t0,t0,180 # 80000380 +800001f0 : +800001f0: 0330000f fence rw,rw +800001f4: 00018513 mv a0,gp +800001f8: 00118193 addi gp,gp,1 +800001fc: 170000ef jal ra,8000036c +80000200: f14022f3 csrr t0,mhartid +80000204: f40294e3 bnez t0,8000014c -800002d4 : -800002d4: 1002a32f lr.w t1,(t0) -800002d8: 00130313 addi t1,t1,1 -800002dc: 1862a3af sc.w t2,t1,(t0) -800002e0: fe039ae3 bnez t2,800002d4 -800002e4: 00000297 auipc t0,0x0 -800002e8: 0982a283 lw t0,152(t0) # 8000037c -800002ec: 00629c63 bne t0,t1,80000304 -800002f0: 001e8293 addi t0,t4,1 -800002f4: 00000317 auipc t1,0x0 -800002f8: 08032623 sw zero,140(t1) # 80000380 -800002fc: 00000317 auipc t1,0x0 -80000300: 08532423 sw t0,136(t1) # 80000384 +80000208 : +80000208: 00000297 auipc t0,0x0 +8000020c: 2442a283 lw t0,580(t0) # 8000044c +80000210: 00000317 auipc t1,0x0 +80000214: 24032303 lw t1,576(t1) # 80000450 +80000218: 04628263 beq t0,t1,8000025c +8000021c: 00000517 auipc a0,0x0 +80000220: 2e852503 lw a0,744(a0) # 80000504 +80000224: f80002b7 lui t0,0xf8000 +80000228: 01428293 addi t0,t0,20 # f8000014 +8000022c: f1402373 csrr t1,mhartid +80000230: 01031313 slli t1,t1,0x10 +80000234: 006282b3 add t0,t0,t1 +80000238: 00a2a023 sw a0,0(t0) +8000023c: 00000517 auipc a0,0x0 +80000240: 2c452503 lw a0,708(a0) # 80000500 +80000244: f80002b7 lui t0,0xf8000 +80000248: 01428293 addi t0,t0,20 # f8000014 +8000024c: f1402373 csrr t1,mhartid +80000250: 01031313 slli t1,t1,0x10 +80000254: 006282b3 add t0,t0,t1 +80000258: 00a2a023 sw a0,0(t0) -80000304 : -80000304: 00000297 auipc t0,0x0 -80000308: 0802a283 lw t0,128(t0) # 80000384 -8000030c: ffd28ce3 beq t0,t4,80000304 -80000310: f80002b7 lui t0,0xf8000 -80000314: 01028293 addi t0,t0,16 # f8000010 -80000318: f1402373 csrr t1,mhartid -8000031c: 01031313 slli t1,t1,0x10 -80000320: 006282b3 add t0,t0,t1 -80000324: 00a2a023 sw a0,0(t0) -80000328: 00008067 ret +8000025c : +8000025c: f14022f3 csrr t0,mhartid +80000260: ee0296e3 bnez t0,8000014c +80000264: 00000297 auipc t0,0x0 +80000268: 2402ae23 sw zero,604(t0) # 800004c0 +8000026c: 00000297 auipc t0,0x0 +80000270: 2402ac23 sw zero,600(t0) # 800004c4 +80000274: 00000417 auipc s0,0x0 +80000278: 1c842403 lw s0,456(s0) # 8000043c +8000027c: 00000297 auipc t0,0x0 +80000280: 1d42a283 lw t0,468(t0) # 80000450 +80000284: 00128293 addi t0,t0,1 +80000288: 00000317 auipc t1,0x0 +8000028c: 1c532423 sw t0,456(t1) # 80000450 +80000290: 04829063 bne t0,s0,800002d0 +80000294: 00000317 auipc t1,0x0 +80000298: 1a032e23 sw zero,444(t1) # 80000450 +8000029c: 00000297 auipc t0,0x0 +800002a0: 1b02a283 lw t0,432(t0) # 8000044c +800002a4: 00128293 addi t0,t0,1 +800002a8: 00000317 auipc t1,0x0 +800002ac: 1a532223 sw t0,420(t1) # 8000044c +800002b0: 02829063 bne t0,s0,800002d0 +800002b4: 00000317 auipc t1,0x0 +800002b8: 18032c23 sw zero,408(t1) # 8000044c +800002bc: 00000297 auipc t0,0x0 +800002c0: 1982a283 lw t0,408(t0) # 80000454 +800002c4: 00128293 addi t0,t0,1 +800002c8: 00000317 auipc t1,0x0 +800002cc: 18532623 sw t0,396(t1) # 80000454 -8000032c : -8000032c: 00000413 li s0,0 -80000330: f80002b7 lui t0,0xf8000 -80000334: 00828293 addi t0,t0,8 # f8000008 -80000338: f1402373 csrr t1,mhartid -8000033c: 01031313 slli t1,t1,0x10 -80000340: 006282b3 add t0,t0,t1 -80000344: 0082a023 sw s0,0(t0) -80000348: 0240006f j 8000036c +800002d0 : +800002d0: 0130000f fence w,rw +800002d4: e79ff06f j 8000014c -8000034c : -8000034c: 00100413 li s0,1 +800002d8 : +800002d8: 00000417 auipc s0,0x0 +800002dc: 27042403 lw s0,624(s0) # 80000548 +800002e0: 00018513 mv a0,gp +800002e4: 00118193 addi gp,gp,1 +800002e8: 084000ef jal ra,8000036c +800002ec: 000400e7 jalr s0 + +800002f0 : +800002f0: f80002b7 lui t0,0xf8000 +800002f4: 00c28293 addi t0,t0,12 # f800000c +800002f8: f1402373 csrr t1,mhartid +800002fc: 01031313 slli t1,t1,0x10 +80000300: 006282b3 add t0,t0,t1 +80000304: 00a2a023 sw a0,0(t0) +80000308: 00000e97 auipc t4,0x0 +8000030c: 13ceae83 lw t4,316(t4) # 80000444 +80000310: 00000297 auipc t0,0x0 +80000314: 13028293 addi t0,t0,304 # 80000440 +80000318: 00100313 li t1,1 +8000031c: 0062a2af amoadd.w t0,t1,(t0) +80000320: 00128293 addi t0,t0,1 +80000324: 00000317 auipc t1,0x0 +80000328: 11832303 lw t1,280(t1) # 8000043c +8000032c: 00629c63 bne t0,t1,80000344 +80000330: 001e8293 addi t0,t4,1 +80000334: 00000317 auipc t1,0x0 +80000338: 10032623 sw zero,268(t1) # 80000440 +8000033c: 00000317 auipc t1,0x0 +80000340: 10532423 sw t0,264(t1) # 80000444 + +80000344 : +80000344: 00000297 auipc t0,0x0 +80000348: 1002a283 lw t0,256(t0) # 80000444 +8000034c: ffd28ce3 beq t0,t4,80000344 80000350: f80002b7 lui t0,0xf8000 -80000354: 00828293 addi t0,t0,8 # f8000008 +80000354: 01028293 addi t0,t0,16 # f8000010 80000358: f1402373 csrr t1,mhartid 8000035c: 01031313 slli t1,t1,0x10 80000360: 006282b3 add t0,t0,t1 -80000364: 0082a023 sw s0,0(t0) -80000368: 0040006f j 8000036c +80000364: 00a2a023 sw a0,0(t0) +80000368: 00008067 ret -8000036c : -8000036c: 0000006f j 8000036c +8000036c : +8000036c: f80002b7 lui t0,0xf8000 +80000370: 00c28293 addi t0,t0,12 # f800000c +80000374: f1402373 csrr t1,mhartid +80000378: 01031313 slli t1,t1,0x10 +8000037c: 006282b3 add t0,t0,t1 +80000380: 00a2a023 sw a0,0(t0) +80000384: 00000e97 auipc t4,0x0 +80000388: 0c0eae83 lw t4,192(t4) # 80000444 +8000038c: 00000297 auipc t0,0x0 +80000390: 0b428293 addi t0,t0,180 # 80000440 -80000370 : -80000370: fff50513 addi a0,a0,-1 -80000374: fe051ee3 bnez a0,80000370 -80000378: 00008067 ret +80000394 : +80000394: 1002a32f lr.w t1,(t0) +80000398: 00130313 addi t1,t1,1 +8000039c: 1862a3af sc.w t2,t1,(t0) +800003a0: fe039ae3 bnez t2,80000394 +800003a4: 00000297 auipc t0,0x0 +800003a8: 0982a283 lw t0,152(t0) # 8000043c +800003ac: 00629c63 bne t0,t1,800003c4 +800003b0: 001e8293 addi t0,t4,1 +800003b4: 00000317 auipc t1,0x0 +800003b8: 08032623 sw zero,140(t1) # 80000440 +800003bc: 00000317 auipc t1,0x0 +800003c0: 08532423 sw t0,136(t1) # 80000444 -8000037c : -8000037c: 0000 unimp +800003c4 : +800003c4: 00000297 auipc t0,0x0 +800003c8: 0802a283 lw t0,128(t0) # 80000444 +800003cc: ffd28ce3 beq t0,t4,800003c4 +800003d0: f80002b7 lui t0,0xf8000 +800003d4: 01028293 addi t0,t0,16 # f8000010 +800003d8: f1402373 csrr t1,mhartid +800003dc: 01031313 slli t1,t1,0x10 +800003e0: 006282b3 add t0,t0,t1 +800003e4: 00a2a023 sw a0,0(t0) +800003e8: 00008067 ret + +800003ec : +800003ec: 00000413 li s0,0 +800003f0: f80002b7 lui t0,0xf8000 +800003f4: 00828293 addi t0,t0,8 # f8000008 +800003f8: f1402373 csrr t1,mhartid +800003fc: 01031313 slli t1,t1,0x10 +80000400: 006282b3 add t0,t0,t1 +80000404: 0082a023 sw s0,0(t0) +80000408: 0240006f j 8000042c + +8000040c : +8000040c: 00100413 li s0,1 +80000410: f80002b7 lui t0,0xf8000 +80000414: 00828293 addi t0,t0,8 # f8000008 +80000418: f1402373 csrr t1,mhartid +8000041c: 01031313 slli t1,t1,0x10 +80000420: 006282b3 add t0,t0,t1 +80000424: 0082a023 sw s0,0(t0) +80000428: 0040006f j 8000042c + +8000042c : +8000042c: 0000006f j 8000042c + +80000430 : +80000430: fff50513 addi a0,a0,-1 +80000434: fe051ee3 bnez a0,80000430 +80000438: 00008067 ret + +8000043c : +8000043c: 0000 unimp ... -80000380 : -80000380: 0000 unimp - ... - -80000384 : -80000384: 0000 unimp - ... - -80000388 : -80000388: 1000 addi s0,sp,32 - ... - -8000038c : -8000038c: 0000 unimp - ... - -80000390 : -80000390: 0000 unimp - ... - -80000394 : -80000394: 0000 unimp -80000396: 0000 unimp -80000398: 00000013 nop -8000039c: 00000013 nop -800003a0: 00000013 nop -800003a4: 00000013 nop -800003a8: 00000013 nop -800003ac: 00000013 nop -800003b0: 00000013 nop -800003b4: 00000013 nop -800003b8: 00000013 nop -800003bc: 00000013 nop -800003c0: 00000013 nop -800003c4: 00000013 nop -800003c8: 00000013 nop -800003cc: 00000013 nop -800003d0: 00000013 nop -800003d4: 00000013 nop -800003d8: 00000013 nop -800003dc: 00000013 nop -800003e0: 00000013 nop -800003e4: 00000013 nop -800003e8: 00000013 nop -800003ec: 00000013 nop -800003f0: 00000013 nop -800003f4: 00000013 nop -800003f8: 00000013 nop -800003fc: 00000013 nop - -80000400 : -80000400: 0000 unimp - ... - -80000404 : -80000404: 0000 unimp -80000406: 0000 unimp -80000408: 00000013 nop -8000040c: 00000013 nop -80000410: 00000013 nop -80000414: 00000013 nop -80000418: 00000013 nop -8000041c: 00000013 nop -80000420: 00000013 nop -80000424: 00000013 nop -80000428: 00000013 nop -8000042c: 00000013 nop -80000430: 00000013 nop -80000434: 00000013 nop -80000438: 00000013 nop -8000043c: 00000013 nop - -80000440 : +80000440 : 80000440: 0000 unimp ... -80000444 : +80000444 : +80000444: 0000 unimp + ... + +80000448 : +80000448: 1000 addi s0,sp,32 + ... + +8000044c : +8000044c: 0000 unimp + ... + +80000450 : +80000450: 0000 unimp + ... + +80000454 : +80000454: 0000 unimp +80000456: 0000 unimp +80000458: 00000013 nop +8000045c: 00000013 nop +80000460: 00000013 nop +80000464: 00000013 nop +80000468: 00000013 nop +8000046c: 00000013 nop +80000470: 00000013 nop +80000474: 00000013 nop +80000478: 00000013 nop +8000047c: 00000013 nop +80000480: 00000013 nop +80000484: 00000013 nop +80000488: 00000013 nop +8000048c: 00000013 nop +80000490: 00000013 nop +80000494: 00000013 nop +80000498: 00000013 nop +8000049c: 00000013 nop +800004a0: 00000013 nop +800004a4: 00000013 nop +800004a8: 00000013 nop +800004ac: 00000013 nop +800004b0: 00000013 nop +800004b4: 00000013 nop +800004b8: 00000013 nop +800004bc: 00000013 nop + +800004c0 : +800004c0: 0000 unimp + ... + +800004c4 : +800004c4: 0000 unimp +800004c6: 0000 unimp +800004c8: 00000013 nop +800004cc: 00000013 nop +800004d0: 00000013 nop +800004d4: 00000013 nop +800004d8: 00000013 nop +800004dc: 00000013 nop +800004e0: 00000013 nop +800004e4: 00000013 nop +800004e8: 00000013 nop +800004ec: 00000013 nop +800004f0: 00000013 nop +800004f4: 00000013 nop +800004f8: 00000013 nop +800004fc: 00000013 nop + +80000500 : +80000500: 0000 unimp + ... + +80000504 : +80000504: 0000 unimp +80000506: 0000 unimp +80000508: 00000013 nop +8000050c: 00000013 nop +80000510: 00000013 nop +80000514: 00000013 nop +80000518: 00000013 nop +8000051c: 00000013 nop +80000520: 00000013 nop +80000524: 00000013 nop +80000528: 00000013 nop +8000052c: 00000013 nop +80000530: 00000013 nop +80000534: 00000013 nop +80000538: 00000013 nop +8000053c: 00000013 nop + +80000540 : +80000540: 0000 unimp + ... + +80000544 : +80000544: 0000 unimp + ... + +80000548 : ... diff --git a/src/test/cpp/raw/smp/build/smp.bin b/src/test/cpp/raw/smp/build/smp.bin index b391a14e6fda4e289fe59a0e77fca47434fcb461..5ce691833aa5418f5dbaaa0a6dafe4f821e092e2 100755 GIT binary patch literal 1388 zcmcgrL2DC182xr;C#1Br+YXpC*0N^TXf-C$ih>u{H5QM;{sGxNc#P0L5GM(U^dN3A zAc8SfuprIBL$0yjJ$Uq*LvF^Pf~VA(ooQntwjkod^5xCjdGCGSOxA5@&_?iM-Esz$ zV(l&S!D=v|#DF>h2y4X6H(o(P2f%jmIwi2!ht9P+$CZ~wr@L7liv1yTVO-;Jq|pah zp~Qqfl+RF7;1zgvcy;oW+~&2DqvVdjyxtRhhLWVGPN7(j?CEi|TTL->(#xiYf4opp8%e+KN z(23Ss+xmjS)3l~Rcz>Sf?4*U3^%@mMYvq$-t%7%AN!hi z1`@x|iuqE#x+>-xR*S2_10PB5#yQF97ZrCQc<95(tCQmSkkn^adSwK3sA z2HBz?Y*&{At~A{fAzJ|9YuVmX@QBGX%Hfpc(eVR5?WH&$&Be-M)fZbl;4@y5_otP< zJnu;l{q(3;C*A`c+EFjN6L+-<^;w=}=K~dR(H#Ag!k->^meE7Tgaf8LTk%UcAw&*)Mrx@{&D3WSN`#%{3)Ni x%*Q7BXX4%?svC##{2P8p$@jhPG8u|~#vWq~0p;+2y!)-Dep@9P{^e*$`^7D)gA delta 517 zcmZ{hF;Buk6vtn$cSc=6FVcdLRIVl>2&m}XAZ`vMd;!yb08IPxtkAN}9s2btK;8_AyV-C2-G{1k*Z* zpZVoHl|8Ut%vk6LIYs6Hn3J2Jbit&y}dO$F7?Xq;rj4`FM)U!=so(~}>+5dRZ{04oB=B$hKKz*c(W%w7Y3h($R z>DtR6=2qEeEn&57EDY1-L|8D0&kEQ^P31gz6_%FOkeAh8o~y<9hEjd>`d$Z!_C`oW L?h(=4DvAC8dYFFt diff --git a/src/test/cpp/raw/smp/src/crt.S b/src/test/cpp/raw/smp/src/crt.S index 4f984eb..9a72bd7 100644 --- a/src/test/cpp/raw/smp/src/crt.S +++ b/src/test/cpp/raw/smp/src/crt.S @@ -1,4 +1,4 @@ -#define CONSISTENCY_REDO_COUNT 10 +#define CONSISTENCY_REDO_COUNT 50 #define REPORT_OFFSET 0xF8000000 @@ -59,6 +59,51 @@ barrier_amo_test: lw gp, barrier_allocator + +consistancy_test1: + la t0, consistancy_init_load + sw t0, consistancy_init_call, t1 + la t0, consistancy_do_simple_fence + sw t0, consistancy_do_call, t1 + la t0, consistancy_test2 + sw t0, consistancy_done_call, t1 + j consistancy_start + +consistancy_test2: + la t0, consistancy_init_load + sw t0, consistancy_init_call, t1 + la t0, consistancy_do_rl_fence + sw t0, consistancy_do_call, t1 + la t0, success + sw t0, consistancy_done_call, t1 + j consistancy_start + + +consistancy_init_load: + lw s3, (s1) //Help getting the cache loaded for the consistancy check + j consistancy_do_init_done + +consistancy_do_simple_fence: + //Consistancy check : write to read ordering on two thread + sw s2, (s0) + fence w,r + lw s3, (s1) + sw s3, 64(s0) + j consistancy_join + +consistancy_do_rl_fence: + //Consistancy check : write to read ordering on two thread + sw s2, (s0) + lr.w.rl s3, (s1) + sw s3, 64(s0) + j consistancy_join + + +consistancy_start: + mv a0, gp + addi gp, gp, 1 + call barrier_lrsc + sw x0, consistancy_all_tested, t0 consistancy_loop: //Sync mv a0, gp @@ -91,17 +136,17 @@ consistancy_hart_not_involved: j consistancy_join consistancy_do: + lw t0, consistancy_init_call + jalr t0 +consistancy_do_init_done: li s2, 666 mv a0, gp addi gp, gp, 1 - lw s3, (s1) //Help getting the cache loaded for the consistancy check call barrier_lrsc - //Consistancy check : write to read ordering on two thread - sw s2, (s0) - fence w,r - lw s3, (s1) - sw s3, 64(s0) + + lw t0, consistancy_do_call + jalr t0 consistancy_join: fence rw, rw //ensure updated values @@ -144,7 +189,11 @@ consistancy_increment_fence: j consistancy_loop consistancy_passed: - j success + lw s0, consistancy_done_call + mv a0, gp + addi gp, gp, 1 + call barrier_lrsc + jalr s0 @@ -234,4 +283,5 @@ consistancy_a_readed: .word 0 .align 6 //Same cache line consistancy_init_call: .word 0 -consistancy_do_call: .word 0 \ No newline at end of file +consistancy_do_call: .word 0 +consistancy_done_call: .word 0 \ No newline at end of file From 4016b1fc5274b799ca1a5f7be88b7809744396e7 Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Tue, 21 Apr 2020 17:18:08 +0200 Subject: [PATCH 33/91] Add sbt assembly --- project/plugins.sbt | 1 + 1 file changed, 1 insertion(+) diff --git a/project/plugins.sbt b/project/plugins.sbt index e5c4233..60a54de 100644 --- a/project/plugins.sbt +++ b/project/plugins.sbt @@ -1,2 +1,3 @@ addSbtPlugin("com.typesafe.sbteclipse" % "sbteclipse-plugin" % "5.2.4") +addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.14.10") From 3fb123a64a77abec5ca8136a31ba562f40a078a3 Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Tue, 21 Apr 2020 21:20:54 +0200 Subject: [PATCH 34/91] fix withStall --- src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala index f823f4f..286d503 100644 --- a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala +++ b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala @@ -8,7 +8,6 @@ import spinal.lib.bus.bmb.sim.BmbMemoryAgent import spinal.lib.bus.bmb.{Bmb, BmbArbiter, BmbDecoder, BmbExclusiveMonitor, BmbInvalidateMonitor, BmbParameter} import spinal.lib.com.jtag.Jtag import spinal.lib.com.jtag.sim.JtagTcp -import vexriscv.demo.smp.VexRiscvSmpClusterTest.{cpuCount, withStall} import vexriscv.ip.{DataCacheAck, DataCacheConfig, DataCacheMemBus, InstructionCacheConfig} import vexriscv.plugin.{BranchPlugin, CsrPlugin, CsrPluginConfig, DBusCachedPlugin, DBusSimplePlugin, DebugPlugin, DecoderSimplePlugin, FullBarrelShifterPlugin, HazardSimplePlugin, IBusCachedPlugin, IBusSimplePlugin, IntAluPlugin, MmuPlugin, MmuPortConfig, MulDivIterativePlugin, MulPlugin, RegFilePlugin, STATIC, SrcPlugin, YamlPlugin} import vexriscv.{Riscv, VexRiscv, VexRiscvConfig, plugin} @@ -301,7 +300,7 @@ object VexRiscvSmpClusterTestInfrastructure{ val CLINT_CMP_ADDR = CLINT_ADDR+0x4000 val CLINT_TIME_ADDR = CLINT_ADDR+0xBFF8 - def ram(dut : VexRiscvSmpCluster) = { + def ram(dut : VexRiscvSmpCluster, withStall : Boolean) = { import spinal.core.sim._ val cpuCount = dut.cpus.size val ram = new BmbMemoryAgent(0x100000000l){ @@ -479,7 +478,7 @@ object VexRiscvSmpClusterTest extends App{ SimTimeout(100000000l*10*cpuCount) dut.clockDomain.forkSimSpeedPrinter(1.0) VexRiscvSmpClusterTestInfrastructure.init(dut) - val ram = VexRiscvSmpClusterTestInfrastructure.ram(dut) + val ram = VexRiscvSmpClusterTestInfrastructure.ram(dut, withStall) ram.memory.loadBin(0x80000000l, "src/test/cpp/raw/smp/build/smp.bin") periodicaly(20000*10){ assert(ram.reportWatchdog != 0) @@ -505,7 +504,7 @@ object VexRiscvSmpClusterOpenSbi extends App{ simConfig.workspaceName("rawr_4c").compile(VexRiscvSmpClusterGen.vexRiscvCluster(cpuCount)).doSimUntilVoid(seed = 42){dut => // dut.clockDomain.forkSimSpeedPrinter(1.0) VexRiscvSmpClusterTestInfrastructure.init(dut) - val ram = VexRiscvSmpClusterTestInfrastructure.ram(dut) + val ram = VexRiscvSmpClusterTestInfrastructure.ram(dut, withStall) // ram.memory.loadBin(0x80000000l, "../opensbi/build/platform/spinal/vexriscv/sim/smp/firmware/fw_payload.bin") ram.memory.loadBin(0x80000000l, "../opensbi/build/platform/spinal/vexriscv/sim/smp/firmware/fw_jump.bin") ram.memory.loadBin(0xC0000000l, "../buildroot/output/images/Image") From 0c59dd9ed3b967cd3b0f38cdae4ffc083d991dcf Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Mon, 27 Apr 2020 17:37:15 +0200 Subject: [PATCH 35/91] SMP fence now ensure ordering for all kinds of memory transfers --- src/main/scala/vexriscv/ip/DataCache.scala | 2 +- src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/scala/vexriscv/ip/DataCache.scala b/src/main/scala/vexriscv/ip/DataCache.scala index 3464165..4d2b2a2 100644 --- a/src/main/scala/vexriscv/ip/DataCache.scala +++ b/src/main/scala/vexriscv/ip/DataCache.scala @@ -567,7 +567,7 @@ class DataCache(val p : DataCacheConfig) extends Component{ val consistancyCheck = (withInvalidate || withWriteResponse) generate new Area { val fenceConsistent = (if(withInvalidate) sync.fenceConsistent else pending.done) && !io.cpu.writeBack.fenceValid && !io.cpu.memory.fenceValid //Pessimistic fence tracking val totalyConsistent = (if(withInvalidate) sync.totalyConsistent else pending.done) && !(io.cpu.memory.isValid && io.cpu.memory.isWrite) && !(io.cpu.writeBack.isValid && io.cpu.memory.isWrite) - when(io.cpu.execute.isValid && (!io.cpu.execute.args.wr || isAmo)){ + when(io.cpu.execute.isValid /*&& (!io.cpu.execute.args.wr || isAmo)*/){ when(!fenceConsistent || io.cpu.execute.totalyConsistent && !totalyConsistent){ io.cpu.execute.haltIt := True } diff --git a/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala b/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala index 8308bc0..12f38f3 100644 --- a/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala +++ b/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala @@ -236,7 +236,7 @@ class DBusCachedPlugin(val config : DataCacheConfig, if(withLrSc) insert(MEMORY_FENCE_BACK) setWhen(input(MEMORY_LRSC)) if(withAmo) insert(MEMORY_FENCE_BACK) setWhen(input(MEMORY_AMO)) } - when(input(INSTRUCTION)(25)) { //RL but a bit pessimistic as it could be MEMORY_FENCE_BACK when the memory op isn't a read + when(input(INSTRUCTION)(25)) { //RL if(withLrSc) insert(MEMORY_FENCE_FRONT) setWhen(input(MEMORY_LRSC)) if(withAmo) insert(MEMORY_FENCE_FRONT) setWhen(input(MEMORY_AMO)) } From 5fd0b220cdd8ed15a6bf0d66b7b77ea41f0685aa Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Mon, 27 Apr 2020 17:37:30 +0200 Subject: [PATCH 36/91] CsrPlugin add openSbi config --- .../scala/vexriscv/plugin/CsrPlugin.scala | 40 +++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/src/main/scala/vexriscv/plugin/CsrPlugin.scala b/src/main/scala/vexriscv/plugin/CsrPlugin.scala index fce4c15..bb56c3e 100644 --- a/src/main/scala/vexriscv/plugin/CsrPlugin.scala +++ b/src/main/scala/vexriscv/plugin/CsrPlugin.scala @@ -83,6 +83,46 @@ object CsrPluginConfig{ def all : CsrPluginConfig = all(0x00000020l) def small : CsrPluginConfig = small(0x00000020l) def smallest : CsrPluginConfig = smallest(0x00000020l) + + def openSbi(hartId : Int, misa : Int) = CsrPluginConfig( + catchIllegalAccess = true, + mvendorid = 0, + marchid = 0, + mimpid = 0, + mhartid = hartId, + misaExtensionsInit = misa, + misaAccess = CsrAccess.READ_ONLY, + mtvecAccess = CsrAccess.READ_WRITE, //Could have been WRITE_ONLY :( + mtvecInit = null, + mepcAccess = CsrAccess.READ_WRITE, + mscratchGen = true, + mcauseAccess = CsrAccess.READ_ONLY, + mbadaddrAccess = CsrAccess.READ_ONLY, + mcycleAccess = CsrAccess.NONE, + minstretAccess = CsrAccess.NONE, + ucycleAccess = CsrAccess.NONE, + wfiGenAsWait = true, + ecallGen = true, + xtvecModeGen = false, + noCsrAlu = false, + wfiGenAsNop = false, + ebreakGen = false, //TODO + userGen = true, + supervisorGen = true, + sscratchGen = true, + stvecAccess = CsrAccess.READ_WRITE, + sepcAccess = CsrAccess.READ_WRITE, + scauseAccess = CsrAccess.READ_WRITE, + sbadaddrAccess = CsrAccess.READ_WRITE, + scycleAccess = CsrAccess.NONE, + sinstretAccess = CsrAccess.NONE, + satpAccess = CsrAccess.NONE, + medelegAccess = CsrAccess.READ_WRITE, //Could have been WRITE_ONLY :( + midelegAccess = CsrAccess.READ_WRITE, //Could have been WRITE_ONLY :( + pipelineCsrRead = false, + deterministicInteruptionEntry = false + ) + def linuxMinimal(mtVecInit : BigInt) = CsrPluginConfig( catchIllegalAccess = true, mvendorid = 1, From 3ba509931c43902bc823e1cd59b30a028cb54f47 Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Mon, 27 Apr 2020 17:38:06 +0200 Subject: [PATCH 37/91] Add VexRiscvSmpLitexCluster with the required pipelining to get proper FMax --- .../demo/smp/VexRiscvSmpCluster.scala | 65 +++-- .../demo/smp/VexRiscvSmpLitexCluster.scala | 228 ++++++++++++++++++ 2 files changed, 255 insertions(+), 38 deletions(-) create mode 100644 src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexCluster.scala diff --git a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala index 286d503..3c85731 100644 --- a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala +++ b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala @@ -27,7 +27,7 @@ case class VexRiscvSmpCluster(p : VexRiscvSmpClusterParameter, val dMemParameter = BmbInvalidateMonitor.outputParameter(invalidateMonitorParameter) val iBusParameter = p.cpuConfigs.head.plugins.find(_.isInstanceOf[IBusCachedPlugin]).get.asInstanceOf[IBusCachedPlugin].config.getBmbParameter() - val iBusArbiterParameter = iBusParameter.copy(sourceWidth = log2Up(p.cpuConfigs.size)) + val iBusArbiterParameter = iBusParameter//.copy(sourceWidth = log2Up(p.cpuConfigs.size)) val iMemParameter = iBusArbiterParameter val io = new Bundle { @@ -72,20 +72,19 @@ case class VexRiscvSmpCluster(p : VexRiscvSmpClusterParameter, val dBusArbiter = BmbArbiter( p = dBusArbiterParameter, portCount = cpus.size, - pendingRspMax = 64, lowerFirstPriority = false, inputsWithInv = cpus.map(_ => true), inputsWithSync = cpus.map(_ => true), pendingInvMax = 16 ) - (dBusArbiter.io.inputs, cpus).zipped.foreach(_ << _.dBus) + (dBusArbiter.io.inputs, cpus).zipped.foreach(_ << _.dBus.pipelined(invValid = true, ackValid = true, syncValid = true)) val exclusiveMonitor = BmbExclusiveMonitor( inputParameter = exclusiveMonitorParameter, pendingWriteMax = 64 ) - exclusiveMonitor.io.input << dBusArbiter.io.output + exclusiveMonitor.io.input << dBusArbiter.io.output.pipelined(cmdValid = true, cmdReady = true, rspValid = true) val invalidateMonitor = BmbInvalidateMonitor( inputParameter = invalidateMonitorParameter, @@ -113,11 +112,13 @@ case class VexRiscvSmpCluster(p : VexRiscvSmpClusterParameter, object VexRiscvSmpClusterGen { - def vexRiscvConfig(id : Int) = { + def vexRiscvConfig(hartId : Int, + ioRange : UInt => Bool = (x => x(31 downto 28) === 0xF), + resetVector : Long = 0x80000000l) = { val config = VexRiscvConfig( plugins = List( new MmuPlugin( - ioRange = x => x(31 downto 28) === 0xF + ioRange = ioRange ), //Uncomment the whole IBusSimplePlugin and comment IBusCachedPlugin if you want uncached iBus config // new IBusSimplePlugin( @@ -137,10 +138,11 @@ object VexRiscvSmpClusterGen { //Uncomment the whole IBusCachedPlugin and comment IBusSimplePlugin if you want cached iBus config new IBusCachedPlugin( - resetVector = 0x80000000l, + resetVector = resetVector, compressedGen = false, prediction = STATIC, injectorStage = false, + relaxedPcCalculation = true, config = InstructionCacheConfig( cacheSize = 4096*1, bytePerLine = 32, @@ -151,7 +153,7 @@ object VexRiscvSmpClusterGen { catchIllegalAccess = true, catchAccessFault = true, asyncTagMemory = false, - twoCycleRam = false, + twoCycleRam = true, twoCycleCache = true // ) ), @@ -173,6 +175,7 @@ object VexRiscvSmpClusterGen { dBusCmdMasterPipe = true, dBusCmdSlavePipe = true, dBusRspSlavePipe = true, + relaxedMemoryTranslationRegister = true, config = new DataCacheConfig( cacheSize = 4096*1, bytePerLine = 32, @@ -204,8 +207,9 @@ object VexRiscvSmpClusterGen { catchIllegalInstruction = true ), new RegFilePlugin( - regFileReadyKind = plugin.SYNC, - zeroBoot = true + regFileReadyKind = plugin.ASYNC, + zeroBoot = true, + x0Init = false ), new IntAluPlugin, new SrcPlugin( @@ -232,39 +236,17 @@ object VexRiscvSmpClusterGen { divUnrollFactor = 1 ), // new DivPlugin, - new CsrPlugin(CsrPluginConfig.all2(0x80000020l).copy(ebreakGen = false, mhartid = id, misaExtensionsInit = Riscv.misaToInt("imas"))), - // new CsrPlugin(//CsrPluginConfig.all2(0x80000020l).copy(ebreakGen = true)/* - // CsrPluginConfig( - // catchIllegalAccess = false, - // mvendorid = null, - // marchid = null, - // mimpid = null, - // mhartid = null, - // misaExtensionsInit = 0, - // misaAccess = CsrAccess.READ_ONLY, - // mtvecAccess = CsrAccess.WRITE_ONLY, - // mtvecInit = 0x80000020l, - // mepcAccess = CsrAccess.READ_WRITE, - // mscratchGen = true, - // mcauseAccess = CsrAccess.READ_ONLY, - // mbadaddrAccess = CsrAccess.READ_ONLY, - // mcycleAccess = CsrAccess.NONE, - // minstretAccess = CsrAccess.NONE, - // ecallGen = true, - // ebreakGen = true, - // wfiGenAsWait = false, - // wfiGenAsNop = true, - // ucycleAccess = CsrAccess.NONE - // )), + new CsrPlugin(CsrPluginConfig.openSbi(hartId = hartId, misa = Riscv.misaToInt("imas"))), + new BranchPlugin( earlyBranch = false, catchAddressMisaligned = true, fenceiGenAsAJump = false ), - new YamlPlugin(s"cpu$id.yaml") + new YamlPlugin(s"cpu$hartId.yaml") ) ) - if(id == 0) config.plugins += new DebugPlugin(null) + if(hartId == 0) config.plugins += new DebugPlugin(null) config } def vexRiscvCluster(cpuCount : Int) = VexRiscvSmpCluster( @@ -441,8 +423,8 @@ object VexRiscvSmpClusterTestInfrastructure{ val value = (dut.io.softwareInterrupts.toLong & ~mask) | (if(data == 1) mask else 0) dut.io.softwareInterrupts #= value } - onRead(CLINT_CMP_ADDR + hartId*8)(clint.cmp(hartId).toInt) - onRead(CLINT_CMP_ADDR + hartId*8+4)((clint.cmp(hartId) >> 32).toInt) +// onRead(CLINT_CMP_ADDR + hartId*8)(clint.cmp(hartId).toInt) +// onRead(CLINT_CMP_ADDR + hartId*8+4)((clint.cmp(hartId) >> 32).toInt) onWrite(CLINT_CMP_ADDR + hartId*8){data => clint.cmp(hartId) = (clint.cmp(hartId) & 0xFFFFFFFF00000000l) | data} onWrite(CLINT_CMP_ADDR + hartId*8+4){data => clint.cmp(hartId) = (clint.cmp(hartId) & 0x00000000FFFFFFFFl) | (data.toLong << 32)} } @@ -490,6 +472,13 @@ object VexRiscvSmpClusterTest extends App{ // echo "echo 10000 | dhrystone >> log" > test // time sh test & // top -b -n 1 + +// TODO +// litex cluster should use out of order decoder +// MultiChannelFifo.toStream arbitration +// BmbDecoderOutOfOrder arbitration +// DataCache to bmb invalidation that are more than single line +// update fence w to w object VexRiscvSmpClusterOpenSbi extends App{ import spinal.core.sim._ diff --git a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexCluster.scala b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexCluster.scala new file mode 100644 index 0000000..244c3a9 --- /dev/null +++ b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexCluster.scala @@ -0,0 +1,228 @@ +package vexriscv.demo.smp + +import spinal.core._ +import spinal.lib.bus.bmb._ +import spinal.lib.bus.wishbone.{Wishbone, WishboneConfig, WishboneSlaveFactory} +import spinal.lib.com.jtag.Jtag +import spinal.lib._ +import spinal.lib.bus.misc.{AddressMapping, DefaultMapping, SizeMapping} +import spinal.lib.misc.Clint +import vexriscv.demo.smp.VexRiscvSmpClusterGen.vexRiscvConfig +import vexriscv.{VexRiscv, VexRiscvConfig} +import vexriscv.plugin.{CsrPlugin, DBusCachedPlugin, DebugPlugin, IBusCachedPlugin} + +case class LiteDramNativeParameter(addressWidth : Int, dataWidth : Int) + +case class LiteDramNativeCmd(p : LiteDramNativeParameter) extends Bundle{ + val we = Bool() + val addr = UInt(p.addressWidth bits) +} + +case class LiteDramNativeWData(p : LiteDramNativeParameter) extends Bundle{ + val data = Bits(p.dataWidth bits) + val we = Bits(p.dataWidth/8 bits) +} + +case class LiteDramNativeRData(p : LiteDramNativeParameter) extends Bundle{ + val data = Bits(p.dataWidth bits) +} + + +case class LiteDramNative(p : LiteDramNativeParameter) extends Bundle with IMasterSlave { + val cmd = Stream(LiteDramNativeCmd(p)) + val wdata = Stream(LiteDramNativeWData(p)) + val rdata = Stream(LiteDramNativeRData(p)) + override def asMaster(): Unit = { + master(cmd, wdata) + slave(rdata) + } + + def fromBmb(bmb : Bmb): Unit = new Area{ + val resized = bmb.resize(p.dataWidth) + val unburstified = resized.unburstify() + case class Context() extends Bundle { + val context = Bits(unburstified.p.contextWidth bits) + val source = UInt(unburstified.p.sourceWidth bits) + val isWrite = Bool() + } + val (queueFork, cmdFork, dataFork) = StreamFork3(unburstified.cmd) + cmd.arbitrationFrom(cmdFork) + cmd.addr := (cmdFork.address >> log2Up(bmb.p.byteCount)).resized + cmd.we := cmdFork.isWrite + + if(bmb.p.canWrite) { + wdata.arbitrationFrom(dataFork.throwWhen(dataFork.isRead)) + wdata.data := cmdFork.data + wdata.we := cmdFork.mask + } else { + dataFork.ready := True + wdata.valid := False + wdata.data.assignDontCare() + wdata.we.assignDontCare() + } + + val cmdContext = Stream(Context()) + cmdContext.arbitrationFrom(queueFork) + cmdContext.context := unburstified.cmd.context + cmdContext.source := unburstified.cmd.source + cmdContext.isWrite := unburstified.cmd.isWrite + + val rspContext = cmdContext.queue(64) + + rdata.ready := unburstified.rsp.fire && !rspContext.isWrite + rspContext.ready := unburstified.rsp.fire + unburstified.rsp.valid := rspContext.valid && (rspContext.isWrite || rdata.valid) + unburstified.rsp.setSuccess() + unburstified.rsp.last := True + unburstified.rsp.source := rspContext.source + unburstified.rsp.context := rspContext.context + unburstified.rsp.data := rdata.data + } +} + +case class VexRiscvLitexSmpClusterParameter( cluster : VexRiscvSmpClusterParameter, + liteDram : LiteDramNativeParameter, + liteDramMapping : AddressMapping) + +case class VexRiscvLitexSmpCluster(p : VexRiscvLitexSmpClusterParameter, + debugClockDomain : ClockDomain) extends Component{ + + val peripheralWishboneConfig = WishboneConfig( + addressWidth = 30, + dataWidth = 32, + selWidth = 4, + useERR = true, + useBTE = true, + useCTI = true + ) + + val io = new Bundle { + val dMem = master(LiteDramNative(p.liteDram)) + val iMem = master(LiteDramNative(p.liteDram)) + val peripheral = master(Wishbone(peripheralWishboneConfig)) + val clint = slave(Wishbone(Clint.getWisboneConfig())) + val externalInterrupts = in Bits(p.cluster.cpuConfigs.size bits) + val externalSupervisorInterrupts = in Bits(p.cluster.cpuConfigs.size bits) + val jtag = slave(Jtag()) + val debugReset = out Bool() + } + val cpuCount = p.cluster.cpuConfigs.size + val clint = Clint(cpuCount) + clint.driveFrom(WishboneSlaveFactory(io.clint)) + + val cluster = VexRiscvSmpCluster(p.cluster, debugClockDomain) + cluster.io.externalInterrupts <> io.externalInterrupts + cluster.io.externalSupervisorInterrupts <> io.externalSupervisorInterrupts + cluster.io.jtag <> io.jtag + cluster.io.debugReset <> io.debugReset + cluster.io.timerInterrupts <> B(clint.harts.map(_.timerInterrupt)) + cluster.io.softwareInterrupts <> B(clint.harts.map(_.softwareInterrupt)) + + + val dBusDecoder = BmbDecoderOutOfOrder( + p = cluster.io.dMem.p, + mappings = Seq(DefaultMapping, p.liteDramMapping), + capabilities = Seq(cluster.io.dMem.p, cluster.io.dMem.p), + pendingRspTransactionMax = 32 + ) + dBusDecoder.io.input << cluster.io.dMem.pipelined(cmdValid = true, cmdReady = true, rspValid = true) + io.dMem.fromBmb(dBusDecoder.io.outputs(1)) + + val iBusArbiterParameter = cluster.iBusParameter.copy(sourceWidth = log2Up(cpuCount)) + val iBusArbiter = BmbArbiter( + p = iBusArbiterParameter, + portCount = cpuCount, + lowerFirstPriority = false + ) + + (iBusArbiter.io.inputs, cluster.io.iMems).zipped.foreach(_ << _.pipelined(cmdHalfRate = true, rspValid = true)) + + val iBusDecoder = BmbDecoder( + p = iBusArbiter.io.output.p, + mappings = Seq(DefaultMapping, p.liteDramMapping), + capabilities = Seq(iBusArbiterParameter, iBusArbiterParameter), + pendingMax = 15 + ) + iBusDecoder.io.input << iBusArbiter.io.output + io.iMem.fromBmb(iBusDecoder.io.outputs(1)) + + val peripheralArbiter = BmbArbiter( + p = dBusDecoder.io.outputs(0).p.copy(sourceWidth = dBusDecoder.io.outputs(0).p.sourceWidth + 1), + portCount = 2, + lowerFirstPriority = true + ) + peripheralArbiter.io.inputs(0) << iBusDecoder.io.outputs(0).resize(dataWidth = 32).pipelined(cmdHalfRate = true, rspValid = true) + peripheralArbiter.io.inputs(1) << dBusDecoder.io.outputs(0).resize(dataWidth = 32).pipelined(cmdHalfRate = true, rspValid = true) + + val peripheralWishbone = peripheralArbiter.io.output.toWishbone() + io.peripheral << peripheralWishbone +} + + +object VexRiscvLitexSmpClusterOpenSbi extends App{ + import spinal.core.sim._ + + val simConfig = SimConfig + simConfig.withWave + simConfig.allOptimisation + simConfig.addSimulatorFlag("--threads 1") + + val cpuCount = 4 + val withStall = false + + def parameter = VexRiscvLitexSmpClusterParameter( + cluster = VexRiscvSmpClusterParameter( + cpuConfigs = List.tabulate(cpuCount) { hartId => + vexRiscvConfig( + hartId = hartId, + ioRange = address => address.msb, + resetVector = 0 + ) + } + ), + liteDram = LiteDramNativeParameter(addressWidth = 32, dataWidth = 32), + liteDramMapping = SizeMapping(0x40000000l, 0x40000000l) + ) + + def dutGen = VexRiscvLitexSmpCluster( + p = parameter, + debugClockDomain = ClockDomain.current.copy(reset = Bool().setName("debugResetIn")) + ) + simConfig.compile(dutGen).doSimUntilVoid(seed = 42){dut => + // dut.clockDomain.forkSimSpeedPrinter(1.0) +// VexRiscvSmpClusterTestInfrastructure.init(dut) +// val ram = VexRiscvSmpClusterTestInfrastructure.ram(dut, withStall) + // ram.memory.loadBin(0x80000000l, "../opensbi/build/platform/spinal/vexriscv/sim/smp/firmware/fw_payload.bin") +// ram.memory.loadBin(0x80000000l, "../opensbi/build/platform/spinal/vexriscv/sim/smp/firmware/fw_jump.bin") +// ram.memory.loadBin(0xC0000000l, "../buildroot/output/images/Image") +// ram.memory.loadBin(0xC1000000l, "../buildroot/output/images/dtb") +// ram.memory.loadBin(0xC2000000l, "../buildroot/output/images/rootfs.cpio") + + // fork{ + // disableSimWave() + // val atMs = 130 + // val durationMs = 15 + // sleep(atMs*1000000) + // enableSimWave() + // println("** enableSimWave **") + // sleep(durationMs*1000000) + // println("** disableSimWave **") + // while(true) { + // disableSimWave() + // sleep(100000 * 10) + // enableSimWave() + // sleep( 100 * 10) + // } + //// simSuccess() + // } + + fork{ + while(true) { + disableSimWave() + sleep(100000 * 10) + enableSimWave() + sleep( 100 * 10) + } + } + } + } \ No newline at end of file From 4a49b2363655d54d69b8f88baec8b80d3792a689 Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Tue, 28 Apr 2020 14:38:27 +0200 Subject: [PATCH 38/91] Fix regression --- src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala | 1 + src/test/cpp/regression/main.cpp | 2 ++ 2 files changed, 3 insertions(+) diff --git a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala index 3c85731..f4aa237 100644 --- a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala +++ b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala @@ -479,6 +479,7 @@ object VexRiscvSmpClusterTest extends App{ // BmbDecoderOutOfOrder arbitration // DataCache to bmb invalidation that are more than single line // update fence w to w +// DBusCachedPlugin dBusAccess execute.isValid := True is induce a longe combinatorial path to check conditions, D$ execute valid => execute haltIt object VexRiscvSmpClusterOpenSbi extends App{ import spinal.core.sim._ diff --git a/src/test/cpp/regression/main.cpp b/src/test/cpp/regression/main.cpp index 67d7a0a..82e0671 100644 --- a/src/test/cpp/regression/main.cpp +++ b/src/test/cpp/regression/main.cpp @@ -2364,9 +2364,11 @@ public: virtual void onReset(){ top->dBus_cmd_ready = 1; top->dBus_rsp_valid = 0; + #ifdef DBUS_INVALIDATE top->dBus_inv_valid = 0; top->dBus_ack_ready = 0; top->dBus_sync_valid = 0; + #endif } virtual void preCycle(){ From 03a044577527007910a829d7ab7ea9114bdac1b5 Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Tue, 28 Apr 2020 15:50:20 +0200 Subject: [PATCH 39/91] Fix SMP for configuration without writeback stage. Include SMP core into the single core tests regressions --- src/main/scala/vexriscv/ip/DataCache.scala | 4 ++-- .../scala/vexriscv/TestIndividualFeatures.scala | 14 +++++++++----- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/src/main/scala/vexriscv/ip/DataCache.scala b/src/main/scala/vexriscv/ip/DataCache.scala index 4d2b2a2..de29f49 100644 --- a/src/main/scala/vexriscv/ip/DataCache.scala +++ b/src/main/scala/vexriscv/ip/DataCache.scala @@ -565,8 +565,8 @@ class DataCache(val p : DataCacheConfig) extends Component{ //Ensure write to read consistency val consistancyCheck = (withInvalidate || withWriteResponse) generate new Area { - val fenceConsistent = (if(withInvalidate) sync.fenceConsistent else pending.done) && !io.cpu.writeBack.fenceValid && !io.cpu.memory.fenceValid //Pessimistic fence tracking - val totalyConsistent = (if(withInvalidate) sync.totalyConsistent else pending.done) && !(io.cpu.memory.isValid && io.cpu.memory.isWrite) && !(io.cpu.writeBack.isValid && io.cpu.memory.isWrite) + val fenceConsistent = (if(withInvalidate) sync.fenceConsistent else pending.done) && !io.cpu.writeBack.fenceValid && (if(mergeExecuteMemory) True else !io.cpu.memory.fenceValid) //Pessimistic fence tracking + val totalyConsistent = (if(withInvalidate) sync.totalyConsistent else pending.done) && (if(mergeExecuteMemory) True else !(io.cpu.memory.isValid && io.cpu.memory.isWrite)) && !(io.cpu.writeBack.isValid && io.cpu.memory.isWrite) when(io.cpu.execute.isValid /*&& (!io.cpu.execute.args.wr || isAmo)*/){ when(!fenceConsistent || io.cpu.execute.totalyConsistent && !totalyConsistent){ io.cpu.execute.haltIt := True diff --git a/src/test/scala/vexriscv/TestIndividualFeatures.scala b/src/test/scala/vexriscv/TestIndividualFeatures.scala index d292203..f23299a 100644 --- a/src/test/scala/vexriscv/TestIndividualFeatures.scala +++ b/src/test/scala/vexriscv/TestIndividualFeatures.scala @@ -427,8 +427,10 @@ class DBusDimension extends VexRiscvDimension("DBus") { var cacheSize = 0 var wayCount = 0 val withLrSc = catchAll - val withAmo = catchAll && r.nextBoolean() - val dBusRspSlavePipe, relaxedMemoryTranslationRegister = r.nextBoolean() + val withSmp = withLrSc && r.nextBoolean() + val withAmo = catchAll && r.nextBoolean() || withSmp + val dBusRspSlavePipe = r.nextBoolean() || withSmp + val relaxedMemoryTranslationRegister = r.nextBoolean() val earlyWaysHits = r.nextBoolean() && !noWriteBack val dBusCmdMasterPipe, dBusCmdSlavePipe = false //As it create test bench issues @@ -436,8 +438,8 @@ class DBusDimension extends VexRiscvDimension("DBus") { cacheSize = 512 << r.nextInt(5) wayCount = 1 << r.nextInt(3) }while(cacheSize/wayCount < 512 || (catchAll && cacheSize/wayCount > 4096)) - new VexRiscvPosition("Cached" + "S" + cacheSize + "W" + wayCount + "BPL" + bytePerLine + (if(dBusCmdMasterPipe) "Cmp " else "") + (if(dBusCmdSlavePipe) "Csp " else "") + (if(dBusRspSlavePipe) "Rsp " else "") + (if(relaxedMemoryTranslationRegister) "Rmtr " else "") + (if(earlyWaysHits) "Ewh " else "") + (if(withAmo) "Amo " else "")) { - override def testParam = "DBUS=CACHED " + (if(withLrSc) "LRSC=yes " else "") + (if(withAmo) "AMO=yes " else "") + new VexRiscvPosition("Cached" + "S" + cacheSize + "W" + wayCount + "BPL" + bytePerLine + (if(dBusCmdMasterPipe) "Cmp " else "") + (if(dBusCmdSlavePipe) "Csp " else "") + (if(dBusRspSlavePipe) "Rsp " else "") + (if(relaxedMemoryTranslationRegister) "Rmtr " else "") + (if(earlyWaysHits) "Ewh " else "") + (if(withAmo) "Amo " else "") + (if(withSmp) "Smp " else "")) { + override def testParam = "DBUS=CACHED " + (if(withLrSc) "LRSC=yes " else "") + (if(withAmo) "AMO=yes " else "") + (if(withSmp) "DBUS_EXCLUSIVE=yes DBUS_INVALIDATE=yes " else "") override def applyOn(config: VexRiscvConfig): Unit = { config.plugins += new DBusCachedPlugin( @@ -453,7 +455,9 @@ class DBusDimension extends VexRiscvDimension("DBus") { catchUnaligned = catchAll, withLrSc = withLrSc, withAmo = withAmo, - earlyWaysHits = earlyWaysHits + earlyWaysHits = earlyWaysHits, + withExclusive = withSmp, + withInvalidate = withSmp ), dBusCmdMasterPipe = dBusCmdMasterPipe, dBusCmdSlavePipe = dBusCmdSlavePipe, From 23b8c40cab6b41bb3f9c3d2c0a6ca88f185443b1 Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Tue, 28 Apr 2020 16:19:00 +0200 Subject: [PATCH 40/91] update travis verilator --- scripts/regression/verilator.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/regression/verilator.mk b/scripts/regression/verilator.mk index b13ca4c..9472858 100644 --- a/scripts/regression/verilator.mk +++ b/scripts/regression/verilator.mk @@ -3,7 +3,7 @@ verilator/configure: rm -rf verilator* - wget https://www.veripool.org/ftp/verilator-4.012.tgz + wget https://www.veripool.org/ftp/verilator-4.032.tgz tar xvzf verilator*.t*gz mv verilator-4.012 verilator From eee9927baf875fd5ab270542a7188442fd559ade Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Tue, 28 Apr 2020 22:10:56 +0200 Subject: [PATCH 41/91] IBusCachedPlugin now support memory data width multiple of 32 --- src/main/scala/vexriscv/ip/InstructionCache.scala | 9 ++++----- src/test/cpp/regression/main.cpp | 11 ++++++++--- src/test/cpp/regression/makefile | 4 ++++ src/test/scala/vexriscv/TestIndividualFeatures.scala | 9 +++++---- 4 files changed, 21 insertions(+), 12 deletions(-) diff --git a/src/main/scala/vexriscv/ip/InstructionCache.scala b/src/main/scala/vexriscv/ip/InstructionCache.scala index 09b1a8a..469377c 100644 --- a/src/main/scala/vexriscv/ip/InstructionCache.scala +++ b/src/main/scala/vexriscv/ip/InstructionCache.scala @@ -70,7 +70,7 @@ case class InstructionCacheConfig( cacheSize : Int, def getBmbParameter() = BmbParameter( addressWidth = 32, - dataWidth = 32, + dataWidth = memDataWidth, lengthWidth = log2Up(this.bytePerLine), sourceWidth = 0, contextWidth = 0, @@ -278,7 +278,6 @@ case class InstructionCacheFlushBus() extends Bundle with IMasterSlave{ class InstructionCache(p : InstructionCacheConfig) extends Component{ import p._ - assert(cpuDataWidth == memDataWidth, "Need testing") val io = new Bundle{ val flush = in Bool() val cpu = slave(InstructionCacheCpuBus(p)) @@ -287,7 +286,7 @@ class InstructionCache(p : InstructionCacheConfig) extends Component{ val lineWidth = bytePerLine*8 val lineCount = cacheSize/bytePerLine - val wordWidth = Math.max(memDataWidth,32) + val wordWidth = cpuDataWidth val wordWidthLog2 = log2Up(wordWidth) val wordPerLine = lineWidth/wordWidth val memWordPerLine = lineWidth/memDataWidth @@ -295,7 +294,7 @@ class InstructionCache(p : InstructionCacheConfig) extends Component{ val bytePerMemWord = memDataWidth/8 val wayLineCount = lineCount/wayCount val wayLineLog2 = log2Up(wayLineCount) - val wayWordCount = wayLineCount * wordPerLine + val wayMemWordCount = wayLineCount * memWordPerLine val tagRange = addressWidth-1 downto log2Up(wayLineCount*bytePerLine) val lineRange = tagRange.low-1 downto log2Up(bytePerLine) @@ -314,7 +313,7 @@ class InstructionCache(p : InstructionCacheConfig) extends Component{ val ways = Seq.fill(wayCount)(new Area{ val tags = Mem(LineTag(),wayLineCount) - val datas = Mem(Bits(memDataWidth bits),wayWordCount) + val datas = Mem(Bits(memDataWidth bits),wayMemWordCount) if(preResetFlush){ tags.initBigInt(List.fill(wayLineCount)(BigInt(0))) diff --git a/src/test/cpp/regression/main.cpp b/src/test/cpp/regression/main.cpp index 82e0671..5fa9635 100644 --- a/src/test/cpp/regression/main.cpp +++ b/src/test/cpp/regression/main.cpp @@ -2026,10 +2026,15 @@ public: ws->fail(); } #endif - ws->iBusAccess(address,&top->iBus_rsp_payload_data,&error); + error = false; + for(int idx = 0;idx < IBUS_DATA_WIDTH/32;idx++){ + bool localError; + ws->iBusAccess(address+idx*4,((uint32_t*)&top->iBus_rsp_payload_data)+idx,&localError); + error |= localError; + } top->iBus_rsp_payload_error = error; - pendingCount--; - address = address + 4; + pendingCount-=IBUS_DATA_WIDTH/32; + address = address + IBUS_DATA_WIDTH/8; top->iBus_rsp_valid = 1; } if(ws->iStall) top->iBus_cmd_ready = VL_RANDOM_I(7) < 100 && pendingCount == 0; diff --git a/src/test/cpp/regression/makefile b/src/test/cpp/regression/makefile index 160707a..9836326 100644 --- a/src/test/cpp/regression/makefile +++ b/src/test/cpp/regression/makefile @@ -3,6 +3,7 @@ REGRESSION_PATH?=./ VEXRISCV_FILE?=../../../../VexRiscv.v IBUS?=CACHED IBUS_TC?=no +IBUS_DATA_WIDTH?=32 DBUS?=CACHED TRACE?=no TRACE_ACCESS?=no @@ -41,8 +42,11 @@ STOP_ON_ERROR?=no COREMARK=no WITH_USER_IO?=no + ADDCFLAGS += -CFLAGS -DREGRESSION_PATH='\"$(REGRESSION_PATH)/\"' ADDCFLAGS += -CFLAGS -DIBUS_${IBUS} +ADDCFLAGS += -CFLAGS -DIBUS_DATA_WIDTH=${IBUS_DATA_WIDTH} + ADDCFLAGS += -CFLAGS -DDBUS_${DBUS} ADDCFLAGS += -CFLAGS -DREDO=${REDO} ADDCFLAGS += -CFLAGS -pthread diff --git a/src/test/scala/vexriscv/TestIndividualFeatures.scala b/src/test/scala/vexriscv/TestIndividualFeatures.scala index f23299a..98da229 100644 --- a/src/test/scala/vexriscv/TestIndividualFeatures.scala +++ b/src/test/scala/vexriscv/TestIndividualFeatures.scala @@ -354,7 +354,8 @@ class IBusDimension(rvcRate : Double) extends VexRiscvDimension("IBus") { val prediction = random(r, List(NONE, STATIC, DYNAMIC, DYNAMIC_TARGET)) val relaxedPcCalculation, twoCycleCache, injectorStage = r.nextBoolean() val twoCycleRam = r.nextBoolean() && twoCycleCache - val bytePerLine = List(8,16,32,64)(r.nextInt(4)) + val memDataWidth = List(32,64,128)(r.nextInt(3)) + val bytePerLine = Math.max(memDataWidth/8, List(8,16,32,64)(r.nextInt(4))) var cacheSize = 0 var wayCount = 0 do{ @@ -362,8 +363,8 @@ class IBusDimension(rvcRate : Double) extends VexRiscvDimension("IBus") { wayCount = 1 << r.nextInt(3) }while(cacheSize/wayCount < 512 || (catchAll && cacheSize/wayCount > 4096)) - new VexRiscvPosition("Cached" + (if(twoCycleCache) "2cc" else "") + (if(injectorStage) "Injstage" else "") + (if(twoCycleRam) "2cr" else "") + "S" + cacheSize + "W" + wayCount + "BPL" + bytePerLine + (if(relaxedPcCalculation) "Relax" else "") + (if(compressed) "Rvc" else "") + prediction.getClass.getTypeName().replace("$","")+ (if(tighlyCoupled)"Tc" else "")) with InstructionAnticipatedPosition{ - override def testParam = "IBUS=CACHED" + (if(compressed) " COMPRESSED=yes" else "") + (if(tighlyCoupled)" IBUS_TC=yes" else "") + new VexRiscvPosition(s"Cached${memDataWidth}d" + (if(twoCycleCache) "2cc" else "") + (if(injectorStage) "Injstage" else "") + (if(twoCycleRam) "2cr" else "") + "S" + cacheSize + "W" + wayCount + "BPL" + bytePerLine + (if(relaxedPcCalculation) "Relax" else "") + (if(compressed) "Rvc" else "") + prediction.getClass.getTypeName().replace("$","")+ (if(tighlyCoupled)"Tc" else "")) with InstructionAnticipatedPosition{ + override def testParam = s"IBUS=CACHED IBUS_DATA_WIDTH=$memDataWidth" + (if(compressed) " COMPRESSED=yes" else "") + (if(tighlyCoupled)" IBUS_TC=yes" else "") override def applyOn(config: VexRiscvConfig): Unit = { val p = new IBusCachedPlugin( resetVector = 0x80000000l, @@ -378,7 +379,7 @@ class IBusDimension(rvcRate : Double) extends VexRiscvDimension("IBus") { wayCount = wayCount, addressWidth = 32, cpuDataWidth = 32, - memDataWidth = 32, + memDataWidth = memDataWidth, catchIllegalAccess = catchAll, catchAccessFault = catchAll, asyncTagMemory = false, From 7b80e1fc307727d3a9a5c8023d9ba12b123b52b1 Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Tue, 28 Apr 2020 22:11:41 +0200 Subject: [PATCH 42/91] Set SMP workspace to use i$ memDataWidth of 128 bits --- src/main/scala/vexriscv/TestsWorkspace.scala | 2 +- .../demo/smp/VexRiscvSmpCluster.scala | 2 +- .../demo/smp/VexRiscvSmpLitexCluster.scala | 30 ++++++++++++++++++- 3 files changed, 31 insertions(+), 3 deletions(-) diff --git a/src/main/scala/vexriscv/TestsWorkspace.scala b/src/main/scala/vexriscv/TestsWorkspace.scala index 3cd633b..4d9575e 100644 --- a/src/main/scala/vexriscv/TestsWorkspace.scala +++ b/src/main/scala/vexriscv/TestsWorkspace.scala @@ -64,7 +64,7 @@ object TestsWorkspace { wayCount = 1, addressWidth = 32, cpuDataWidth = 32, - memDataWidth = 32, + memDataWidth = 128, catchIllegalAccess = true, catchAccessFault = true, asyncTagMemory = false, diff --git a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala index f4aa237..3a72603 100644 --- a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala +++ b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala @@ -149,7 +149,7 @@ object VexRiscvSmpClusterGen { wayCount = 1, addressWidth = 32, cpuDataWidth = 32, - memDataWidth = 32, + memDataWidth = 128, catchIllegalAccess = true, catchAccessFault = true, asyncTagMemory = false, diff --git a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexCluster.scala b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexCluster.scala index 244c3a9..60add57 100644 --- a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexCluster.scala +++ b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexCluster.scala @@ -6,7 +6,9 @@ import spinal.lib.bus.wishbone.{Wishbone, WishboneConfig, WishboneSlaveFactory} import spinal.lib.com.jtag.Jtag import spinal.lib._ import spinal.lib.bus.misc.{AddressMapping, DefaultMapping, SizeMapping} +import spinal.lib.eda.bench.Bench import spinal.lib.misc.Clint +import vexriscv.demo.smp.VexRiscvLitexSmpClusterOpenSbi.{cpuCount, parameter} import vexriscv.demo.smp.VexRiscvSmpClusterGen.vexRiscvConfig import vexriscv.{VexRiscv, VexRiscvConfig} import vexriscv.plugin.{CsrPlugin, DBusCachedPlugin, DebugPlugin, IBusCachedPlugin} @@ -157,6 +159,32 @@ case class VexRiscvLitexSmpCluster(p : VexRiscvLitexSmpClusterParameter, val peripheralWishbone = peripheralArbiter.io.output.toWishbone() io.peripheral << peripheralWishbone } +object VexRiscvLitexSmpClusterGen extends App { + val cpuCount = 4 + val withStall = false + + def parameter = VexRiscvLitexSmpClusterParameter( + cluster = VexRiscvSmpClusterParameter( + cpuConfigs = List.tabulate(cpuCount) { hartId => + vexRiscvConfig( + hartId = hartId, + ioRange = address => address.msb, + resetVector = 0 + ) + } + ), + liteDram = LiteDramNativeParameter(addressWidth = 32, dataWidth = 128), + liteDramMapping = SizeMapping(0x40000000l, 0x40000000l) + ) + + def dutGen = VexRiscvLitexSmpCluster( + p = parameter, + debugClockDomain = ClockDomain.current.copy(reset = Bool().setName("debugResetIn")) + ) + + SpinalVerilog(Bench.compressIo(dutGen)) + +} object VexRiscvLitexSmpClusterOpenSbi extends App{ @@ -180,7 +208,7 @@ object VexRiscvLitexSmpClusterOpenSbi extends App{ ) } ), - liteDram = LiteDramNativeParameter(addressWidth = 32, dataWidth = 32), + liteDram = LiteDramNativeParameter(addressWidth = 32, dataWidth = 128), liteDramMapping = SizeMapping(0x40000000l, 0x40000000l) ) From 86e0cbc1f3683be610a0c1ce9796daa82c44541c Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Wed, 29 Apr 2020 13:59:43 +0200 Subject: [PATCH 43/91] I$ with memDataWidth > cpuDataWidth now mux memWords into cpuWords before the decode stage by default. Add twoCycleRamInnerMux option to move that to the decode stage --- src/main/scala/vexriscv/ip/InstructionCache.scala | 10 ++++++---- src/test/scala/vexriscv/TestIndividualFeatures.scala | 4 +++- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/src/main/scala/vexriscv/ip/InstructionCache.scala b/src/main/scala/vexriscv/ip/InstructionCache.scala index 469377c..7560114 100644 --- a/src/main/scala/vexriscv/ip/InstructionCache.scala +++ b/src/main/scala/vexriscv/ip/InstructionCache.scala @@ -22,6 +22,7 @@ case class InstructionCacheConfig( cacheSize : Int, asyncTagMemory : Boolean, twoCycleCache : Boolean = true, twoCycleRam : Boolean = false, + twoCycleRamInnerMux : Boolean = false, preResetFlush : Boolean = false, bypassGen : Boolean = false ){ @@ -404,7 +405,8 @@ class InstructionCache(p : InstructionCacheConfig) extends Component{ }else { way.tags.readSync(io.cpu.prefetch.pc(lineRange), !io.cpu.fetch.isStuck) } - val data = way.datas.readSync(io.cpu.prefetch.pc(lineRange.high downto memWordRange.low), !io.cpu.fetch.isStuck) + val dataMem = way.datas.readSync(io.cpu.prefetch.pc(lineRange.high downto memWordRange.low), !io.cpu.fetch.isStuck) + val data = if(!twoCycleRamInnerMux) dataMem.subdivideIn(cpuDataWidth bits).read(io.cpu.fetch.pc(memWordToCpuWordRange)) else dataMem } } @@ -415,7 +417,7 @@ class InstructionCache(p : InstructionCacheConfig) extends Component{ val id = OHToUInt(hits) val error = read.waysValues.map(_.tag.error).read(id) val data = read.waysValues.map(_.data).read(id) - val word = if(cpuDataWidth == memDataWidth) CombInit(data) else data.subdivideIn(cpuDataWidth bits).read(io.cpu.fetch.pc(memWordToCpuWordRange)) + val word = if(cpuDataWidth == memDataWidth || !twoCycleRamInnerMux) CombInit(data) else data.subdivideIn(cpuDataWidth bits).read(io.cpu.fetch.pc(memWordToCpuWordRange)) io.cpu.fetch.data := (if(p.bypassGen) (io.cpu.fetch.dataBypassValid ? io.cpu.fetch.dataBypass | word) else word) if(twoCycleCache){ io.cpu.decode.data := RegNextWhen(io.cpu.fetch.data,!io.cpu.decode.isStuck) @@ -423,7 +425,7 @@ class InstructionCache(p : InstructionCacheConfig) extends Component{ } if(twoCycleRam && wayCount == 1){ - val cacheData = if(cpuDataWidth == memDataWidth) CombInit(read.waysValues.head.data) else read.waysValues.head.data.subdivideIn(cpuDataWidth bits).read(io.cpu.fetch.pc(memWordToCpuWordRange)) + val cacheData = if(cpuDataWidth == memDataWidth || !twoCycleRamInnerMux) CombInit(read.waysValues.head.data) else read.waysValues.head.data.subdivideIn(cpuDataWidth bits).read(io.cpu.fetch.pc(memWordToCpuWordRange)) io.cpu.fetch.data := (if(p.bypassGen) (io.cpu.fetch.dataBypassValid ? io.cpu.fetch.dataBypass | cacheData) else cacheData) } @@ -459,7 +461,7 @@ class InstructionCache(p : InstructionCacheConfig) extends Component{ val id = OHToUInt(hits) val error = tags(id).error val data = fetchStage.read.waysValues.map(way => stage(way.data)).read(id) - val word = if(cpuDataWidth == memDataWidth) data else data.subdivideIn(cpuDataWidth bits).read(io.cpu.decode.pc(memWordToCpuWordRange)) + val word = if(cpuDataWidth == memDataWidth || !twoCycleRamInnerMux) data else data.subdivideIn(cpuDataWidth bits).read(io.cpu.decode.pc(memWordToCpuWordRange)) if(p.bypassGen) when(stage(io.cpu.fetch.dataBypassValid)){ word := stage(io.cpu.fetch.dataBypass) } diff --git a/src/test/scala/vexriscv/TestIndividualFeatures.scala b/src/test/scala/vexriscv/TestIndividualFeatures.scala index 98da229..72651b4 100644 --- a/src/test/scala/vexriscv/TestIndividualFeatures.scala +++ b/src/test/scala/vexriscv/TestIndividualFeatures.scala @@ -354,6 +354,7 @@ class IBusDimension(rvcRate : Double) extends VexRiscvDimension("IBus") { val prediction = random(r, List(NONE, STATIC, DYNAMIC, DYNAMIC_TARGET)) val relaxedPcCalculation, twoCycleCache, injectorStage = r.nextBoolean() val twoCycleRam = r.nextBoolean() && twoCycleCache + val twoCycleRamInnerMux = r.nextBoolean() && twoCycleRam val memDataWidth = List(32,64,128)(r.nextInt(3)) val bytePerLine = Math.max(memDataWidth/8, List(8,16,32,64)(r.nextInt(4))) var cacheSize = 0 @@ -384,7 +385,8 @@ class IBusDimension(rvcRate : Double) extends VexRiscvDimension("IBus") { catchAccessFault = catchAll, asyncTagMemory = false, twoCycleRam = twoCycleRam, - twoCycleCache = twoCycleCache + twoCycleCache = twoCycleCache, + twoCycleRamInnerMux = twoCycleRamInnerMux ) ) if(tighlyCoupled) p.newTightlyCoupledPort(TightlyCoupledPortParameter("iBusTc", a => a(30 downto 28) === 0x0)) From 9e9d28bfa697038adbe9a3babf579908fb402446 Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Wed, 29 Apr 2020 14:02:41 +0200 Subject: [PATCH 44/91] d$ now implement consistancy hazard by using writeback redo --- src/main/scala/vexriscv/ip/DataCache.scala | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/main/scala/vexriscv/ip/DataCache.scala b/src/main/scala/vexriscv/ip/DataCache.scala index de29f49..f99acff 100644 --- a/src/main/scala/vexriscv/ip/DataCache.scala +++ b/src/main/scala/vexriscv/ip/DataCache.scala @@ -564,12 +564,13 @@ class DataCache(val p : DataCacheConfig) extends Component{ val isAmo = if(withAmo) io.cpu.execute.isAmo else False //Ensure write to read consistency + val consistancyIssue = False val consistancyCheck = (withInvalidate || withWriteResponse) generate new Area { val fenceConsistent = (if(withInvalidate) sync.fenceConsistent else pending.done) && !io.cpu.writeBack.fenceValid && (if(mergeExecuteMemory) True else !io.cpu.memory.fenceValid) //Pessimistic fence tracking val totalyConsistent = (if(withInvalidate) sync.totalyConsistent else pending.done) && (if(mergeExecuteMemory) True else !(io.cpu.memory.isValid && io.cpu.memory.isWrite)) && !(io.cpu.writeBack.isValid && io.cpu.memory.isWrite) when(io.cpu.execute.isValid /*&& (!io.cpu.execute.args.wr || isAmo)*/){ when(!fenceConsistent || io.cpu.execute.totalyConsistent && !totalyConsistent){ - io.cpu.execute.haltIt := True + consistancyIssue := True } } } @@ -588,6 +589,7 @@ class DataCache(val p : DataCacheConfig) extends Component{ val wayHits = earlyWaysHits generate ways.map(way => (io.cpu.memory.mmuBus.rsp.physicalAddress(tagRange) === way.tagsReadRsp.address && way.tagsReadRsp.valid)) val dataMux = earlyDataMux generate MuxOH(wayHits, ways.map(_.dataReadRsp)) val wayInvalidate = stagePipe(stage0. wayInvalidate) + val consistancyIssue = stagePipe(stage0.consistancyIssue) val dataColisions = if(mergeExecuteMemory){ stagePipe(stage0.dataColisions) } else { @@ -605,6 +607,7 @@ class DataCache(val p : DataCacheConfig) extends Component{ val tagsReadRsp = ways.map(w => ramPipe(w.tagsReadRsp)) val dataReadRsp = !earlyDataMux generate ways.map(w => ramPipe(w.dataReadRsp)) val wayInvalidate = stagePipe(stageA. wayInvalidate) + val consistancyIssue = stagePipe(stageA.consistancyIssue) val dataColisions = stagePipe(stageA.dataColisions) val waysHitsBeforeInvalidate = if(earlyWaysHits) stagePipe(B(stageA.wayHits)) else B(tagsReadRsp.map(tag => mmuRsp.physicalAddress(tagRange) === tag.address && tag.valid).asBits()) val waysHits = waysHitsBeforeInvalidate & ~wayInvalidate @@ -651,8 +654,7 @@ class DataCache(val p : DataCacheConfig) extends Component{ val lrSc = withInternalLrSc generate new Area{ val reserved = RegInit(False) - when(io.cpu.writeBack.isValid && !io.cpu.writeBack.isStuck && request.isLrsc - && !io.cpu.redo && !io.cpu.writeBack.mmuException && !io.cpu.writeBack.unalignedAccess && !io.cpu.writeBack.accessError){ + when(io.cpu.writeBack.isValid && !io.cpu.writeBack.isStuck && request.isLrsc){ reserved := !request.wr } } @@ -828,15 +830,16 @@ class DataCache(val p : DataCacheConfig) extends Component{ } //remove side effects on exceptions - when(mmuRsp.refilling || io.cpu.writeBack.accessError || io.cpu.writeBack.mmuException || io.cpu.writeBack.unalignedAccess){ + when(consistancyIssue || mmuRsp.refilling || io.cpu.writeBack.accessError || io.cpu.writeBack.mmuException || io.cpu.writeBack.unalignedAccess){ io.mem.cmd.valid := False tagsWriteCmd.valid := False dataWriteCmd.valid := False loaderValid := False io.cpu.writeBack.haltIt := False + if(withInternalLrSc) lrSc.reserved := lrSc.reserved if(withExternalAmo) amo.external.state := LR_CMD } - io.cpu.redo setWhen(io.cpu.writeBack.isValid && mmuRsp.refilling) + io.cpu.redo setWhen(io.cpu.writeBack.isValid && (mmuRsp.refilling || consistancyIssue)) assert(!(io.cpu.writeBack.isValid && !io.cpu.writeBack.haltIt && io.cpu.writeBack.isStuck), "writeBack stuck by another plugin is not allowed") } From 7c50fa6d5541a0e2118e67a13250d1b826c81f9f Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Wed, 29 Apr 2020 14:03:00 +0200 Subject: [PATCH 45/91] SmpCluster now use i$ line of 64 bytes --- src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala | 2 +- src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexCluster.scala | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala index 3a72603..98ab788 100644 --- a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala +++ b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala @@ -145,7 +145,7 @@ object VexRiscvSmpClusterGen { relaxedPcCalculation = true, config = InstructionCacheConfig( cacheSize = 4096*1, - bytePerLine = 32, + bytePerLine = 64, wayCount = 1, addressWidth = 32, cpuDataWidth = 32, diff --git a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexCluster.scala b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexCluster.scala index 60add57..e911e44 100644 --- a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexCluster.scala +++ b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexCluster.scala @@ -148,8 +148,9 @@ case class VexRiscvLitexSmpCluster(p : VexRiscvLitexSmpClusterParameter, iBusDecoder.io.input << iBusArbiter.io.output io.iMem.fromBmb(iBusDecoder.io.outputs(1)) + val peripheralAccessLength = Math.max(iBusDecoder.io.outputs(0).p.lengthWidth, dBusDecoder.io.outputs(0).p.lengthWidth) val peripheralArbiter = BmbArbiter( - p = dBusDecoder.io.outputs(0).p.copy(sourceWidth = dBusDecoder.io.outputs(0).p.sourceWidth + 1), + p = dBusDecoder.io.outputs(0).p.copy(sourceWidth = dBusDecoder.io.outputs(0).p.sourceWidth + 1, lengthWidth = peripheralAccessLength), portCount = 2, lowerFirstPriority = true ) From dc0da9662a4187439afcba079f143e1e624f310e Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Fri, 1 May 2020 11:14:11 +0200 Subject: [PATCH 46/91] Update SMP fence (final) --- src/main/scala/vexriscv/TestsWorkspace.scala | 3 +- src/main/scala/vexriscv/ip/DataCache.scala | 122 ++++++++++++------ .../vexriscv/plugin/DBusCachedPlugin.scala | 61 ++++----- 3 files changed, 119 insertions(+), 67 deletions(-) diff --git a/src/main/scala/vexriscv/TestsWorkspace.scala b/src/main/scala/vexriscv/TestsWorkspace.scala index 4d9575e..04ceda3 100644 --- a/src/main/scala/vexriscv/TestsWorkspace.scala +++ b/src/main/scala/vexriscv/TestsWorkspace.scala @@ -103,7 +103,8 @@ object TestsWorkspace { withLrSc = true, withAmo = true, withExclusive = true, - withInvalidate = true + withInvalidate = true, + pendingMax = 32 // ) ), memoryTranslatorPortConfig = MmuPortConfig( diff --git a/src/main/scala/vexriscv/ip/DataCache.scala b/src/main/scala/vexriscv/ip/DataCache.scala index f99acff..2e33999 100644 --- a/src/main/scala/vexriscv/ip/DataCache.scala +++ b/src/main/scala/vexriscv/ip/DataCache.scala @@ -27,10 +27,11 @@ case class DataCacheConfig(cacheSize : Int, withAmo : Boolean = false, withExclusive : Boolean = false, withInvalidate : Boolean = false, - pendingMax : Int = 64, + pendingMax : Int = 32, mergeExecuteMemory : Boolean = false){ assert(!(mergeExecuteMemory && (earlyDataMux || earlyWaysHits))) assert(!(earlyDataMux && !earlyWaysHits)) + assert(isPow2(pendingMax)) def withWriteResponse = withExclusive def burstSize = bytePerLine*8/memDataWidth val burstLength = bytePerLine/(memDataWidth/8) @@ -102,10 +103,9 @@ case class DataCacheCpuExecute(p : DataCacheConfig) extends Bundle with IMasterS val address = UInt(p.addressWidth bit) val haltIt = Bool val args = DataCacheCpuExecuteArgs(p) - val totalyConsistent = Bool() override def asMaster(): Unit = { - out(isValid, args, address, totalyConsistent) + out(isValid, args, address) in(haltIt) } } @@ -120,6 +120,8 @@ case class DataCacheCpuExecuteArgs(p : DataCacheConfig) extends Bundle{ val swap = Bool() val alu = Bits(3 bits) } + + val totalyConsistent = Bool() //Only for AMO/LRSC } case class DataCacheCpuMemory(p : DataCacheConfig) extends Bundle with IMasterSlave{ @@ -129,16 +131,31 @@ case class DataCacheCpuMemory(p : DataCacheConfig) extends Bundle with IMasterSl val isWrite = Bool val address = UInt(p.addressWidth bit) val mmuBus = MemoryTranslatorBus() - val fenceValid = Bool() override def asMaster(): Unit = { - out(isValid, isStuck, isRemoved, address, fenceValid) + out(isValid, isStuck, isRemoved, address) in(isWrite) slave(mmuBus) } } +case class FenceFlags() extends Bundle { + val SW,SR,SO,SI,PW,PR,PO,PI = Bool() + val FM = Bits(4 bits) + + def SL = SR || SI + def SS = SW || SO + def PL = PR || PI + def PS = PW || PO + def forceAll(): Unit ={ + List(SW,SR,SO,SI,PW,PR,PO,PI).foreach(_ := True) + } + def clearAll(): Unit ={ + List(SW,SR,SO,SI,PW,PR,PO,PI).foreach(_ := False) + } +} + case class DataCacheCpuWriteBack(p : DataCacheConfig) extends Bundle with IMasterSlave{ val isValid = Bool() val isStuck = Bool() @@ -149,13 +166,10 @@ case class DataCacheCpuWriteBack(p : DataCacheConfig) extends Bundle with IMaste val address = UInt(p.addressWidth bit) val mmuException, unalignedAccess, accessError = Bool() val keepMemRspData = Bool() //Used by external AMO to avoid having an internal buffer - val fenceValid = Bool() - val fenceFire = Bool() - - // val exceptionBus = if(p.catchSomething) Flow(ExceptionCause()) else null + val fence = FenceFlags() override def asMaster(): Unit = { - out(isValid,isStuck,isUser, address, fenceValid, fenceFire) + out(isValid,isStuck,isUser, address, fence) in(haltIt, data, mmuException, unalignedAccess, accessError, isWrite, keepMemRspData) } } @@ -180,6 +194,7 @@ case class DataCacheCpuBus(p : DataCacheConfig) extends Bundle with IMasterSlave case class DataCacheMemCmd(p : DataCacheConfig) extends Bundle{ val wr = Bool + val uncached = Bool val address = UInt(p.addressWidth bit) val data = Bits(p.memDataWidth bits) val mask = Bits(p.memDataWidth/8 bits) @@ -532,21 +547,48 @@ class DataCache(val p : DataCacheConfig) extends Component{ val sync = withInvalidate generate new Area{ io.mem.sync.ready := True - val pendingSync = Reg(UInt(log2Up(pendingMax) + 1 bits)) init(0) - val pendingSyncNext = pendingSync + U(io.mem.cmd.fire && io.mem.cmd.wr) - U(io.mem.sync.fire) - pendingSync := pendingSyncNext + val syncContext = new Area{ + val history = Mem(Bool, pendingMax) + val wPtr, rPtr = Reg(UInt(log2Up(pendingMax)+1 bits)) init(0) + when(io.mem.cmd.fire && io.mem.cmd.wr){ + history.write(wPtr.resized, io.mem.cmd.uncached) + wPtr := wPtr + 1 + } - val full = RegNext(pendingSync.msb) - io.cpu.execute.haltIt setWhen(full) + when(io.mem.sync.fire){ + rPtr := rPtr + 1 + } + val uncached = history.readAsync(rPtr.resized) + val full = RegNext(wPtr - rPtr >= pendingMax-1) + io.cpu.execute.haltIt setWhen(full) + } + def pending(inc : Bool, dec : Bool) = new Area { + val pendingSync = Reg(UInt(log2Up(pendingMax) + 1 bits)) init(0) + val pendingSyncNext = pendingSync + U(io.mem.cmd.fire && io.mem.cmd.wr && inc) - U(io.mem.sync.fire && dec) + pendingSync := pendingSyncNext + } - val incoerentSync = Reg(UInt(log2Up(pendingMax) + 1 bits)) init(0) - incoerentSync := incoerentSync - U(io.mem.sync.fire && incoerentSync =/= 0) - when(io.cpu.writeBack.fenceValid){ incoerentSync := pendingSyncNext } + val writeCached = pending(inc = !io.mem.cmd.uncached, dec = !syncContext.uncached) + val writeUncached = pending(inc = io.mem.cmd.uncached, dec = syncContext.uncached) + def track(load : Bool, uncached : Boolean) = new Area { + val counter = Reg(UInt(log2Up(pendingMax) + 1 bits)) init(0) + counter := counter - U(io.mem.sync.fire && counter =/= 0 && (if(uncached) syncContext.uncached else !syncContext.uncached)) + when(load){ counter := (if(uncached) writeUncached.pendingSyncNext else writeCached.pendingSyncNext) } - val totalyConsistent = pendingSync === 0 - val fenceConsistent = incoerentSync === 0 + val busy = counter =/= 0 + } + + val w2w = track(load = io.cpu.writeBack.fence.PW && io.cpu.writeBack.fence.SW, uncached = false) + val w2r = track(load = io.cpu.writeBack.fence.PW && io.cpu.writeBack.fence.SR, uncached = false) + val w2i = track(load = io.cpu.writeBack.fence.PW && io.cpu.writeBack.fence.SI, uncached = false) + val w2o = track(load = io.cpu.writeBack.fence.PW && io.cpu.writeBack.fence.SO, uncached = false) + val o2w = track(load = io.cpu.writeBack.fence.PO && io.cpu.writeBack.fence.SW, uncached = true) + val o2r = track(load = io.cpu.writeBack.fence.PO && io.cpu.writeBack.fence.SR, uncached = true) + //Assume o2i and o2o are ordered by the interconnect + + val notTotalyConsistent = w2w.busy || w2r.busy || w2i.busy || w2o.busy || o2w.busy || o2r.busy } @@ -562,18 +604,6 @@ class DataCache(val p : DataCacheConfig) extends Component{ val wayInvalidate = B(0, wayCount bits) //Used if invalidate enabled val isAmo = if(withAmo) io.cpu.execute.isAmo else False - - //Ensure write to read consistency - val consistancyIssue = False - val consistancyCheck = (withInvalidate || withWriteResponse) generate new Area { - val fenceConsistent = (if(withInvalidate) sync.fenceConsistent else pending.done) && !io.cpu.writeBack.fenceValid && (if(mergeExecuteMemory) True else !io.cpu.memory.fenceValid) //Pessimistic fence tracking - val totalyConsistent = (if(withInvalidate) sync.totalyConsistent else pending.done) && (if(mergeExecuteMemory) True else !(io.cpu.memory.isValid && io.cpu.memory.isWrite)) && !(io.cpu.writeBack.isValid && io.cpu.memory.isWrite) - when(io.cpu.execute.isValid /*&& (!io.cpu.execute.args.wr || isAmo)*/){ - when(!fenceConsistent || io.cpu.execute.totalyConsistent && !totalyConsistent){ - consistancyIssue := True - } - } - } } val stageA = new Area{ @@ -586,10 +616,29 @@ class DataCache(val p : DataCacheConfig) extends Component{ io.cpu.memory.mmuBus.end := !io.cpu.memory.isStuck || io.cpu.memory.isRemoved io.cpu.memory.isWrite := request.wr + val isAmo = if(withAmo) request.isAmo else False + val isLrsc = if(withAmo) request.isLrsc else False + val consistancyCheck = (withInvalidate || withWriteResponse) generate new Area { + val hazard = False + val w = sync.w2w.busy || sync.o2w.busy + val r = stagePipe(sync.w2r.busy || sync.o2r.busy) || sync.w2r.busy || sync.o2r.busy // As it use the cache, need to check against the execute stage status too + val o = CombInit(sync.w2o.busy) + val i = CombInit(sync.w2i.busy) + + val s = io.cpu.memory.mmuBus.rsp.isIoAccess ? o | w + val l = io.cpu.memory.mmuBus.rsp.isIoAccess ? i | r + + when(isAmo? (s || l) | (request.wr ? s | l)){ + hazard := True + } + when(request.totalyConsistent && (sync.notTotalyConsistent || io.cpu.writeBack.isValid && io.cpu.writeBack.isWrite)){ + hazard := True + } + } + val wayHits = earlyWaysHits generate ways.map(way => (io.cpu.memory.mmuBus.rsp.physicalAddress(tagRange) === way.tagsReadRsp.address && way.tagsReadRsp.valid)) val dataMux = earlyDataMux generate MuxOH(wayHits, ways.map(_.dataReadRsp)) val wayInvalidate = stagePipe(stage0. wayInvalidate) - val consistancyIssue = stagePipe(stage0.consistancyIssue) val dataColisions = if(mergeExecuteMemory){ stagePipe(stage0.dataColisions) } else { @@ -607,7 +656,7 @@ class DataCache(val p : DataCacheConfig) extends Component{ val tagsReadRsp = ways.map(w => ramPipe(w.tagsReadRsp)) val dataReadRsp = !earlyDataMux generate ways.map(w => ramPipe(w.dataReadRsp)) val wayInvalidate = stagePipe(stageA. wayInvalidate) - val consistancyIssue = stagePipe(stageA.consistancyIssue) + val consistancyHazard = if(stageA.consistancyCheck != null) stagePipe(stageA.consistancyCheck.hazard) else False val dataColisions = stagePipe(stageA.dataColisions) val waysHitsBeforeInvalidate = if(earlyWaysHits) stagePipe(B(stageA.wayHits)) else B(tagsReadRsp.map(tag => mmuRsp.physicalAddress(tagRange) === tag.address && tag.valid).asBits()) val waysHits = waysHitsBeforeInvalidate & ~wayInvalidate @@ -718,6 +767,7 @@ class DataCache(val p : DataCacheConfig) extends Component{ io.mem.cmd.wr := request.wr io.mem.cmd.mask := mask io.mem.cmd.data := requestDataBypass + io.mem.cmd.uncached := mmuRsp.isIoAccess if(withExternalLrSc) io.mem.cmd.exclusive := request.isLrsc || isAmo @@ -830,7 +880,7 @@ class DataCache(val p : DataCacheConfig) extends Component{ } //remove side effects on exceptions - when(consistancyIssue || mmuRsp.refilling || io.cpu.writeBack.accessError || io.cpu.writeBack.mmuException || io.cpu.writeBack.unalignedAccess){ + when(consistancyHazard || mmuRsp.refilling || io.cpu.writeBack.accessError || io.cpu.writeBack.mmuException || io.cpu.writeBack.unalignedAccess){ io.mem.cmd.valid := False tagsWriteCmd.valid := False dataWriteCmd.valid := False @@ -839,7 +889,7 @@ class DataCache(val p : DataCacheConfig) extends Component{ if(withInternalLrSc) lrSc.reserved := lrSc.reserved if(withExternalAmo) amo.external.state := LR_CMD } - io.cpu.redo setWhen(io.cpu.writeBack.isValid && (mmuRsp.refilling || consistancyIssue)) + io.cpu.redo setWhen(io.cpu.writeBack.isValid && (mmuRsp.refilling || consistancyHazard)) assert(!(io.cpu.writeBack.isValid && !io.cpu.writeBack.haltIt && io.cpu.writeBack.isStuck), "writeBack stuck by another plugin is not allowed") } diff --git a/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala b/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala index 12f38f3..1d5adbb 100644 --- a/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala +++ b/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala @@ -50,8 +50,7 @@ class DBusCachedPlugin(val config : DataCacheConfig, object MEMORY_LRSC extends Stageable(Bool) object MEMORY_AMO extends Stageable(Bool) object MEMORY_FENCE extends Stageable(Bool) - object MEMORY_FENCE_FRONT extends Stageable(Bool) - object MEMORY_FENCE_BACK extends Stageable(Bool) + object MEMORY_FORCE_CONSTISTENCY extends Stageable(Bool) object IS_DBUS_SHARING extends Stageable(Bool()) object MEMORY_VIRTUAL_ADDRESS extends Stageable(UInt(32 bits)) @@ -215,31 +214,13 @@ class DBusCachedPlugin(val config : DataCacheConfig, arbitration.haltItself := True } - case class FenceFlags() extends Bundle { - val SW,SR,SO,SI,PW,PR,PO,PI = Bool() - val FM = Bits(4 bits) - - def SL = SR || SI - def SS = SW || SO - def PL = PR || PI - def PS = PW || PO - } //Manage write to read hit ordering (ensure invalidation timings) - val fence = new Area{ - insert(MEMORY_FENCE_FRONT) := False - insert(MEMORY_FENCE_BACK) := False - val ff = input(INSTRUCTION)(31 downto 20).as(FenceFlags()) - if(withWriteResponse){ - insert(MEMORY_FENCE_BACK) setWhen(input(MEMORY_FENCE) && (ff.PS && ff.SL)) - when(input(INSTRUCTION)(26)) { //AQ - if(withLrSc) insert(MEMORY_FENCE_BACK) setWhen(input(MEMORY_LRSC)) - if(withAmo) insert(MEMORY_FENCE_BACK) setWhen(input(MEMORY_AMO)) - } - when(input(INSTRUCTION)(25)) { //RL - if(withLrSc) insert(MEMORY_FENCE_FRONT) setWhen(input(MEMORY_LRSC)) - if(withAmo) insert(MEMORY_FENCE_FRONT) setWhen(input(MEMORY_AMO)) - } + val fence = new Area { + insert(MEMORY_FORCE_CONSTISTENCY) := False + when(input(INSTRUCTION)(25)) { //RL + if (withLrSc) insert(MEMORY_FORCE_CONSTISTENCY) setWhen (input(MEMORY_LRSC)) + if (withAmo) insert(MEMORY_FORCE_CONSTISTENCY) setWhen (input(MEMORY_AMO)) } } } @@ -260,7 +241,7 @@ class DBusCachedPlugin(val config : DataCacheConfig, cache.io.cpu.flush.valid := arbitration.isValid && input(MEMORY_MANAGMENT) - cache.io.cpu.execute.totalyConsistent := arbitration.isValid && input(MEMORY_FENCE_FRONT) + cache.io.cpu.execute.args.totalyConsistent := input(MEMORY_FORCE_CONSTISTENCY) arbitration.haltItself setWhen(cache.io.cpu.flush.isStall || cache.io.cpu.execute.haltIt) if(withLrSc) { @@ -302,8 +283,6 @@ class DBusCachedPlugin(val config : DataCacheConfig, cache.io.cpu.memory.mmuBus <> mmuBus cache.io.cpu.memory.mmuBus.rsp.isIoAccess setWhen(pipeline(DEBUG_BYPASS_CACHE) && !cache.io.cpu.memory.isWrite) - - cache.io.cpu.memory.fenceValid := arbitration.isValid && input(MEMORY_FENCE_BACK) } val managementStage = stages.last @@ -314,8 +293,30 @@ class DBusCachedPlugin(val config : DataCacheConfig, cache.io.cpu.writeBack.isUser := (if(privilegeService != null) privilegeService.isUser() else False) cache.io.cpu.writeBack.address := U(input(REGFILE_WRITE_DATA)) - cache.io.cpu.writeBack.fenceValid := arbitration.isValid && input(MEMORY_FENCE_BACK) - cache.io.cpu.writeBack.fenceFire := arbitration.isFiring && input(MEMORY_FENCE_BACK) + val fence = if(withInvalidate) { + cache.io.cpu.writeBack.fence := input(INSTRUCTION)(31 downto 20).as(FenceFlags()) + val aquire = False + if(withWriteResponse) when(input(INSTRUCTION)(26)) { //AQ + if(withLrSc) when(input(MEMORY_LRSC)){ + aquire := True + } + if(withAmo) when(input(MEMORY_AMO)){ + aquire := True + } + } + + when(aquire){ + cache.io.cpu.writeBack.fence.forceAll() + } + + when(!input(MEMORY_FENCE) || !arbitration.isFiring){ + cache.io.cpu.writeBack.fence.clearAll() + } + + when(arbitration.isValid && (input(MEMORY_FENCE) || aquire)){ + memory.arbitration.haltByOther := True //Ensure that the fence affect the memory stage instruction by stoping it + } + } redoBranch.valid := False redoBranch.payload := input(PC) From f5f30615ba2f37bb8cf0f6f3653783c92446fead Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Fri, 1 May 2020 11:14:52 +0200 Subject: [PATCH 47/91] Got litex SMP cluster to work on FPGA --- .../demo/smp/VexRiscvSmpCluster.scala | 17 +- .../demo/smp/VexRiscvSmpLitexCluster.scala | 353 ++++++++++++++---- 2 files changed, 292 insertions(+), 78 deletions(-) diff --git a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala index 98ab788..d5c0bcf 100644 --- a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala +++ b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala @@ -249,11 +249,11 @@ object VexRiscvSmpClusterGen { if(hartId == 0) config.plugins += new DebugPlugin(null) config } - def vexRiscvCluster(cpuCount : Int) = VexRiscvSmpCluster( + def vexRiscvCluster(cpuCount : Int, resetVector : Long = 0x80000000l) = VexRiscvSmpCluster( debugClockDomain = ClockDomain.current.copy(reset = Bool().setName("debugResetIn")), p = VexRiscvSmpClusterParameter( cpuConfigs = List.tabulate(cpuCount) { - vexRiscvConfig(_) + vexRiscvConfig(_, resetVector = resetVector) } ) ) @@ -440,7 +440,10 @@ object VexRiscvSmpClusterTestInfrastructure{ import spinal.core.sim._ dut.clockDomain.forkStimulus(10) dut.debugClockDomain.forkStimulus(10) - JtagTcp(dut.io.jtag, 100) +// JtagTcp(dut.io.jtag, 100) + dut.io.jtag.tck #= false + dut.io.jtag.tdi #= false + dut.io.jtag.tms #= false } } @@ -491,11 +494,17 @@ object VexRiscvSmpClusterOpenSbi extends App{ val cpuCount = 4 val withStall = false - simConfig.workspaceName("rawr_4c").compile(VexRiscvSmpClusterGen.vexRiscvCluster(cpuCount)).doSimUntilVoid(seed = 42){dut => + simConfig.workspaceName("rawr_4c").compile(VexRiscvSmpClusterGen.vexRiscvCluster(cpuCount, resetVector = 0x80000000l)).doSimUntilVoid(seed = 42){dut => // dut.clockDomain.forkSimSpeedPrinter(1.0) VexRiscvSmpClusterTestInfrastructure.init(dut) val ram = VexRiscvSmpClusterTestInfrastructure.ram(dut, withStall) // ram.memory.loadBin(0x80000000l, "../opensbi/build/platform/spinal/vexriscv/sim/smp/firmware/fw_payload.bin") + +// ram.memory.loadBin(0x40F00000l, "/media/data/open/litex_smp/litex_vexriscv_smp/images/fw_jump.bin") +// ram.memory.loadBin(0x40000000l, "/media/data/open/litex_smp/litex_vexriscv_smp/images/Image") +// ram.memory.loadBin(0x40EF0000l, "/media/data/open/litex_smp/litex_vexriscv_smp/images/dtb") +// ram.memory.loadBin(0x41000000l, "/media/data/open/litex_smp/litex_vexriscv_smp/images/rootfs.cpio") + ram.memory.loadBin(0x80000000l, "../opensbi/build/platform/spinal/vexriscv/sim/smp/firmware/fw_jump.bin") ram.memory.loadBin(0xC0000000l, "../buildroot/output/images/Image") ram.memory.loadBin(0xC1000000l, "../buildroot/output/images/dtb") diff --git a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexCluster.scala b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexCluster.scala index e911e44..170b584 100644 --- a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexCluster.scala +++ b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexCluster.scala @@ -5,14 +5,19 @@ import spinal.lib.bus.bmb._ import spinal.lib.bus.wishbone.{Wishbone, WishboneConfig, WishboneSlaveFactory} import spinal.lib.com.jtag.Jtag import spinal.lib._ +import spinal.lib.bus.bmb.sim.{BmbMemoryMultiPort, BmbMemoryTester} import spinal.lib.bus.misc.{AddressMapping, DefaultMapping, SizeMapping} import spinal.lib.eda.bench.Bench import spinal.lib.misc.Clint +import spinal.lib.sim.{SimData, SparseMemory, StreamDriver, StreamMonitor, StreamReadyRandomizer} import vexriscv.demo.smp.VexRiscvLitexSmpClusterOpenSbi.{cpuCount, parameter} import vexriscv.demo.smp.VexRiscvSmpClusterGen.vexRiscvConfig import vexriscv.{VexRiscv, VexRiscvConfig} import vexriscv.plugin.{CsrPlugin, DBusCachedPlugin, DebugPlugin, IBusCachedPlugin} +import scala.collection.mutable +import scala.util.Random + case class LiteDramNativeParameter(addressWidth : Int, dataWidth : Int) case class LiteDramNativeCmd(p : LiteDramNativeParameter) extends Bundle{ @@ -39,46 +44,195 @@ case class LiteDramNative(p : LiteDramNativeParameter) extends Bundle with IMast slave(rdata) } - def fromBmb(bmb : Bmb): Unit = new Area{ - val resized = bmb.resize(p.dataWidth) - val unburstified = resized.unburstify() - case class Context() extends Bundle { - val context = Bits(unburstified.p.contextWidth bits) - val source = UInt(unburstified.p.sourceWidth bits) - val isWrite = Bool() - } - val (queueFork, cmdFork, dataFork) = StreamFork3(unburstified.cmd) - cmd.arbitrationFrom(cmdFork) - cmd.addr := (cmdFork.address >> log2Up(bmb.p.byteCount)).resized - cmd.we := cmdFork.isWrite + def fromBmb(bmb : Bmb, wdataFifoSize : Int, rdataFifoSize : Int) = { + val bridge = BmbToLiteDram( + bmbParameter = bmb.p, + liteDramParameter = this.p, + wdataFifoSize = wdataFifoSize, + rdataFifoSize = rdataFifoSize + ) + bridge.io.input << bmb + bridge.io.output <> this + bridge + } - if(bmb.p.canWrite) { - wdata.arbitrationFrom(dataFork.throwWhen(dataFork.isRead)) - wdata.data := cmdFork.data - wdata.we := cmdFork.mask - } else { - dataFork.ready := True - wdata.valid := False - wdata.data.assignDontCare() - wdata.we.assignDontCare() + def simSlave(ram : SparseMemory,cd : ClockDomain, bmb : Bmb = null): Unit ={ + import spinal.core.sim._ + def bus = this + case class Cmd(address : Long, we : Boolean) + case class WData(data : BigInt, we : Long) + val cmdQueue = mutable.Queue[Cmd]() + val wdataQueue = mutable.Queue[WData]() + val rdataQueue = mutable.Queue[BigInt]() + + + case class Ref(address : Long, data : BigInt, we : Long, time : Long) + val ref = mutable.Queue[Ref]() + if(bmb != null) StreamMonitor(bmb.cmd, cd){p => + if(bmb.cmd.opcode.toInt == 1) ref.enqueue(Ref(p.fragment.address.toLong, p.fragment.data.toBigInt, p.fragment.mask.toLong, simTime())) } - val cmdContext = Stream(Context()) - cmdContext.arbitrationFrom(queueFork) - cmdContext.context := unburstified.cmd.context - cmdContext.source := unburstified.cmd.source - cmdContext.isWrite := unburstified.cmd.isWrite + var writeCmdCounter, writeDataCounter = 0 + StreamReadyRandomizer(bus.cmd, cd) + StreamMonitor(bus.cmd, cd) { t => + cmdQueue.enqueue(Cmd(t.addr.toLong * (p.dataWidth/8) , t.we.toBoolean)) + if(t.we.toBoolean) writeCmdCounter += 1 + } - val rspContext = cmdContext.queue(64) + StreamReadyRandomizer(bus.wdata, cd) + StreamMonitor(bus.wdata, cd) { p => + writeDataCounter += 1 +// if(p.data.toBigInt == BigInt("00000002000000020000000200000002",16)){ +// println("ASD") +// } + wdataQueue.enqueue(WData(p.data.toBigInt, p.we.toLong)) + } - rdata.ready := unburstified.rsp.fire && !rspContext.isWrite - rspContext.ready := unburstified.rsp.fire - unburstified.rsp.valid := rspContext.valid && (rspContext.isWrite || rdata.valid) - unburstified.rsp.setSuccess() - unburstified.rsp.last := True - unburstified.rsp.source := rspContext.source - unburstified.rsp.context := rspContext.context - unburstified.rsp.data := rdata.data +// new SimStreamAssert(cmd,cd) +// new SimStreamAssert(wdata,cd) +// new SimStreamAssert(rdata,cd) + + cd.onSamplings{ + if(writeDataCounter-writeCmdCounter > 2){ + println("miaou") + } + if(cmdQueue.nonEmpty && Random.nextFloat() < 0.5){ + val cmd = cmdQueue.head + if(cmd.we){ + if(wdataQueue.nonEmpty){ +// if(cmd.address == 0xc02ae850l) { +// println(s"! $writeCmdCounter $writeDataCounter") +// } + cmdQueue.dequeue() + val wdata = wdataQueue.dequeue() + val raw = wdata.data.toByteArray + val left = wdata.data.toByteArray.size-1 + if(bmb != null){ + assert(ref.nonEmpty) + assert((ref.head.address & 0xFFFFFFF0l) == cmd.address) + assert(ref.head.data == wdata.data) + assert(ref.head.we == wdata.we) + ref.dequeue() + } +// if(cmd.address == 0xc02ae850l) { +// println(s"$cmd $wdata ${simTime()}") +// } + for(i <- 0 until p.dataWidth/8){ + + + if(((wdata.we >> i) & 1) != 0) { +// if(cmd.address == 0xc02ae850l) { +// println(s"W $i ${ if (left - i >= 0) raw(left - i) else 0}") +// } + ram.write(cmd.address + i, if (left - i >= 0) raw(left - i) else 0) + } + } + } + } else { + cmdQueue.dequeue() + val value = new Array[Byte](p.dataWidth/8+1) + val left = value.size-1 + for(i <- 0 until p.dataWidth/8) { + value(left-i) = ram.read(cmd.address+i) + } + rdataQueue.enqueue(BigInt(value)) + } + } + } + + StreamDriver(bus.rdata, cd){ p => + if(rdataQueue.isEmpty){ + false + } else { + p.data #= rdataQueue.dequeue() + true + } + } + } +} + + + +case class BmbToLiteDram(bmbParameter : BmbParameter, + liteDramParameter : LiteDramNativeParameter, + wdataFifoSize : Int, + rdataFifoSize : Int) extends Component{ + val io = new Bundle { + val input = slave(Bmb(bmbParameter)) + val output = master(LiteDramNative(liteDramParameter)) + } + + val resized = io.input.resize(liteDramParameter.dataWidth) + val unburstified = resized.unburstify() + case class Context() extends Bundle { + val context = Bits(unburstified.p.contextWidth bits) + val source = UInt(unburstified.p.sourceWidth bits) + val isWrite = Bool() + } + + assert(isPow2(rdataFifoSize)) + val pendingRead = Reg(UInt(log2Up(rdataFifoSize) + 1 bits)) init(0) + + val halt = Bool() + val (cmdFork, dataFork) = StreamFork2(unburstified.cmd.haltWhen(halt)) + io.output.cmd.arbitrationFrom(cmdFork.haltWhen(pendingRead.msb)) + io.output.cmd.addr := (cmdFork.address >> log2Up(liteDramParameter.dataWidth/8)).resized + io.output.cmd.we := cmdFork.isWrite + + if(bmbParameter.canWrite) { + val fifo = dataFork.throwWhen(dataFork.isRead).queue(wdataFifoSize) + io.output.wdata.arbitrationFrom(fifo) + io.output.wdata.data := fifo.data + io.output.wdata.we := fifo.mask + } else { + dataFork.ready := True + io.output.wdata.valid := False + io.output.wdata.data.assignDontCare() + io.output.wdata.we.assignDontCare() + } + + val cmdContext = Stream(Context()) + cmdContext.valid := unburstified.cmd.fire + cmdContext.context := unburstified.cmd.context + cmdContext.source := unburstified.cmd.source + cmdContext.isWrite := unburstified.cmd.isWrite + halt := !cmdContext.ready + + val rspContext = cmdContext.queue(rdataFifoSize) + val rdataFifo = io.output.rdata.queueLowLatency(rdataFifoSize, latency = 1) + + rdataFifo.ready := unburstified.rsp.fire && !rspContext.isWrite + rspContext.ready := unburstified.rsp.fire + unburstified.rsp.valid := rspContext.valid && (rspContext.isWrite || rdataFifo.valid) + unburstified.rsp.setSuccess() + unburstified.rsp.last := True + unburstified.rsp.source := rspContext.source + unburstified.rsp.context := rspContext.context + unburstified.rsp.data := rdataFifo.data + + + pendingRead := pendingRead + U(io.output.cmd.fire && !io.output.cmd.we) - U(rdataFifo.fire) +} + +object BmbToLiteDramTester extends App{ + import spinal.core.sim._ + SimConfig.withWave.compile(BmbToLiteDram( + bmbParameter = BmbParameter( + addressWidth = 20, + dataWidth = 32, + lengthWidth = 6, + sourceWidth = 4, + contextWidth = 16 + ), + liteDramParameter = LiteDramNativeParameter( + addressWidth = 20, + dataWidth = 128 + ), + wdataFifoSize = 16, + rdataFifoSize = 16 + )).doSimUntilVoid(seed = 42){dut => + val tester = new BmbMemoryTester(dut.io.input, dut.clockDomain, rspCounterTarget = 3000) + dut.io.output.simSlave(tester.memory.memory, dut.clockDomain) } } @@ -120,15 +274,21 @@ case class VexRiscvLitexSmpCluster(p : VexRiscvLitexSmpClusterParameter, cluster.io.timerInterrupts <> B(clint.harts.map(_.timerInterrupt)) cluster.io.softwareInterrupts <> B(clint.harts.map(_.softwareInterrupt)) - - val dBusDecoder = BmbDecoderOutOfOrder( + //TODO +// val dBusDecoder = BmbDecoderOutOfOrder( +// p = cluster.io.dMem.p, +// mappings = Seq(DefaultMapping, p.liteDramMapping), +// capabilities = Seq(cluster.io.dMem.p, cluster.io.dMem.p), +// pendingRspTransactionMax = 32 +// ) + val dBusDecoder = BmbDecoder( p = cluster.io.dMem.p, mappings = Seq(DefaultMapping, p.liteDramMapping), capabilities = Seq(cluster.io.dMem.p, cluster.io.dMem.p), - pendingRspTransactionMax = 32 + pendingMax = 31 ) dBusDecoder.io.input << cluster.io.dMem.pipelined(cmdValid = true, cmdReady = true, rspValid = true) - io.dMem.fromBmb(dBusDecoder.io.outputs(1)) + val dMemBridge = io.dMem.fromBmb(dBusDecoder.io.outputs(1), wdataFifoSize = 32, rdataFifoSize = 32) val iBusArbiterParameter = cluster.iBusParameter.copy(sourceWidth = log2Up(cpuCount)) val iBusArbiter = BmbArbiter( @@ -146,7 +306,7 @@ case class VexRiscvLitexSmpCluster(p : VexRiscvLitexSmpClusterParameter, pendingMax = 15 ) iBusDecoder.io.input << iBusArbiter.io.output - io.iMem.fromBmb(iBusDecoder.io.outputs(1)) + val iMemBridge = io.iMem.fromBmb(iBusDecoder.io.outputs(1), wdataFifoSize = 0, rdataFifoSize = 32) val peripheralAccessLength = Math.max(iBusDecoder.io.outputs(0).p.lengthWidth, dBusDecoder.io.outputs(0).p.lengthWidth) val peripheralArbiter = BmbArbiter( @@ -160,6 +320,7 @@ case class VexRiscvLitexSmpCluster(p : VexRiscvLitexSmpClusterParameter, val peripheralWishbone = peripheralArbiter.io.output.toWishbone() io.peripheral << peripheralWishbone } + object VexRiscvLitexSmpClusterGen extends App { val cpuCount = 4 val withStall = false @@ -183,7 +344,8 @@ object VexRiscvLitexSmpClusterGen extends App { debugClockDomain = ClockDomain.current.copy(reset = Bool().setName("debugResetIn")) ) - SpinalVerilog(Bench.compressIo(dutGen)) +// SpinalVerilog(Bench.compressIo(dutGen)) + SpinalVerilog(dutGen) } @@ -194,7 +356,6 @@ object VexRiscvLitexSmpClusterOpenSbi extends App{ val simConfig = SimConfig simConfig.withWave simConfig.allOptimisation - simConfig.addSimulatorFlag("--threads 1") val cpuCount = 4 val withStall = false @@ -204,46 +365,90 @@ object VexRiscvLitexSmpClusterOpenSbi extends App{ cpuConfigs = List.tabulate(cpuCount) { hartId => vexRiscvConfig( hartId = hartId, - ioRange = address => address.msb, - resetVector = 0 + ioRange = address => address(31 downto 28) === 0xF, + resetVector = 0x80000000l ) } ), liteDram = LiteDramNativeParameter(addressWidth = 32, dataWidth = 128), - liteDramMapping = SizeMapping(0x40000000l, 0x40000000l) + liteDramMapping = SizeMapping(0x80000000l, 0x70000000l) ) - def dutGen = VexRiscvLitexSmpCluster( - p = parameter, - debugClockDomain = ClockDomain.current.copy(reset = Bool().setName("debugResetIn")) - ) + def dutGen = { + val top = VexRiscvLitexSmpCluster( + p = parameter, + debugClockDomain = ClockDomain.current.copy(reset = Bool().setName("debugResetIn")) + ) + top.rework{ + top.io.clint.setAsDirectionLess.allowDirectionLessIo + top.io.peripheral.setAsDirectionLess.allowDirectionLessIo.simPublic() + + val hit = (top.io.peripheral.ADR <<2 >= 0xF0010000l && top.io.peripheral.ADR<<2 < 0xF0020000l) + top.io.clint.CYC := top.io.peripheral.CYC && hit + top.io.clint.STB := top.io.peripheral.STB + top.io.clint.WE := top.io.peripheral.WE + top.io.clint.ADR := top.io.peripheral.ADR.resized + top.io.clint.DAT_MOSI := top.io.peripheral.DAT_MOSI + top.io.peripheral.DAT_MISO := top.io.clint.DAT_MISO + top.io.peripheral.ACK := top.io.peripheral.CYC && (!hit || top.io.clint.ACK) + top.io.peripheral.ERR := False + + top.dMemBridge.unburstified.cmd.simPublic() + } + top + } simConfig.compile(dutGen).doSimUntilVoid(seed = 42){dut => - // dut.clockDomain.forkSimSpeedPrinter(1.0) -// VexRiscvSmpClusterTestInfrastructure.init(dut) -// val ram = VexRiscvSmpClusterTestInfrastructure.ram(dut, withStall) - // ram.memory.loadBin(0x80000000l, "../opensbi/build/platform/spinal/vexriscv/sim/smp/firmware/fw_payload.bin") -// ram.memory.loadBin(0x80000000l, "../opensbi/build/platform/spinal/vexriscv/sim/smp/firmware/fw_jump.bin") -// ram.memory.loadBin(0xC0000000l, "../buildroot/output/images/Image") -// ram.memory.loadBin(0xC1000000l, "../buildroot/output/images/dtb") -// ram.memory.loadBin(0xC2000000l, "../buildroot/output/images/rootfs.cpio") + dut.clockDomain.forkStimulus(10) + fork { + dut.debugClockDomain.resetSim #= false + sleep (0) + dut.debugClockDomain.resetSim #= true + sleep (10) + dut.debugClockDomain.resetSim #= false + } - // fork{ - // disableSimWave() - // val atMs = 130 - // val durationMs = 15 - // sleep(atMs*1000000) - // enableSimWave() - // println("** enableSimWave **") - // sleep(durationMs*1000000) - // println("** disableSimWave **") - // while(true) { - // disableSimWave() - // sleep(100000 * 10) - // enableSimWave() - // sleep( 100 * 10) - // } - //// simSuccess() - // } + + val ram = SparseMemory() + ram.loadBin(0x80000000l, "../opensbi/build/platform/spinal/vexriscv/sim/smp/firmware/fw_jump.bin") + ram.loadBin(0xC0000000l, "../buildroot/output/images/Image") + ram.loadBin(0xC1000000l, "../buildroot/output/images/dtb") + ram.loadBin(0xC2000000l, "../buildroot/output/images/rootfs.cpio") + + + dut.io.iMem.simSlave(ram, dut.clockDomain) + dut.io.dMem.simSlave(ram, dut.clockDomain, dut.dMemBridge.unburstified) + + dut.io.externalInterrupts #= 0 + dut.io.externalSupervisorInterrupts #= 0 + + dut.clockDomain.onSamplings{ + if(dut.io.peripheral.CYC.toBoolean){ + (dut.io.peripheral.ADR.toLong << 2) match { + case 0xF0000000l => print(dut.io.peripheral.DAT_MOSI.toLong.toChar) + case 0xF0000004l => dut.io.peripheral.DAT_MISO #= (if(System.in.available() != 0) System.in.read() else 0xFFFFFFFFl) + case _ => + } +// println(f"${dut.io.peripheral.ADR.toLong}%x") + } + } + +// fork{ +// disableSimWave() +// val atMs = 8 +// val durationMs = 3 +// sleep(atMs*1000000) +// enableSimWave() +// println("** enableSimWave **") +// sleep(durationMs*1000000) +// println("** disableSimWave **") +// while(true) { +// disableSimWave() +// sleep(100000 * 10) +// enableSimWave() +// sleep( 100 * 10) +// } +// // simSuccess() +// } fork{ while(true) { From 09ac23b78f708c752017ab4ca59e001234c513e2 Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Fri, 1 May 2020 12:45:16 +0200 Subject: [PATCH 48/91] Fix SMP fence lock when 4 stages CPU --- src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala b/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala index 1d5adbb..17c429c 100644 --- a/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala +++ b/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala @@ -314,7 +314,7 @@ class DBusCachedPlugin(val config : DataCacheConfig, } when(arbitration.isValid && (input(MEMORY_FENCE) || aquire)){ - memory.arbitration.haltByOther := True //Ensure that the fence affect the memory stage instruction by stoping it + mmuAndBufferStage.arbitration.haltByOther := True //Ensure that the fence affect the memory stage instruction by stoping it } } From f0745eb0d9b3c1854227a6b58a130f77e7644761 Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Sat, 2 May 2020 23:44:27 +0200 Subject: [PATCH 49/91] update SMP line size to 64 bytes --- src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala index d5c0bcf..92e3976 100644 --- a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala +++ b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala @@ -178,7 +178,7 @@ object VexRiscvSmpClusterGen { relaxedMemoryTranslationRegister = true, config = new DataCacheConfig( cacheSize = 4096*1, - bytePerLine = 32, + bytePerLine = 64, wayCount = 1, addressWidth = 32, cpuDataWidth = 32, From 93b386e16ee6b31b4247979ead63e5a0dc82cbc1 Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Sat, 2 May 2020 23:44:58 +0200 Subject: [PATCH 50/91] litex smp cluster now use OO decoder --- .../demo/smp/VexRiscvSmpLitexCluster.scala | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexCluster.scala b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexCluster.scala index 170b584..c2c287a 100644 --- a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexCluster.scala +++ b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexCluster.scala @@ -274,19 +274,18 @@ case class VexRiscvLitexSmpCluster(p : VexRiscvLitexSmpClusterParameter, cluster.io.timerInterrupts <> B(clint.harts.map(_.timerInterrupt)) cluster.io.softwareInterrupts <> B(clint.harts.map(_.softwareInterrupt)) - //TODO -// val dBusDecoder = BmbDecoderOutOfOrder( -// p = cluster.io.dMem.p, -// mappings = Seq(DefaultMapping, p.liteDramMapping), -// capabilities = Seq(cluster.io.dMem.p, cluster.io.dMem.p), -// pendingRspTransactionMax = 32 -// ) - val dBusDecoder = BmbDecoder( + val dBusDecoder = BmbDecoderOutOfOrder( p = cluster.io.dMem.p, mappings = Seq(DefaultMapping, p.liteDramMapping), capabilities = Seq(cluster.io.dMem.p, cluster.io.dMem.p), - pendingMax = 31 + pendingRspTransactionMax = 32 ) +// val dBusDecoder = BmbDecoderOut( +// p = cluster.io.dMem.p, +// mappings = Seq(DefaultMapping, p.liteDramMapping), +// capabilities = Seq(cluster.io.dMem.p, cluster.io.dMem.p), +// pendingMax = 31 +// ) dBusDecoder.io.input << cluster.io.dMem.pipelined(cmdValid = true, cmdReady = true, rspValid = true) val dMemBridge = io.dMem.fromBmb(dBusDecoder.io.outputs(1), wdataFifoSize = 32, rdataFifoSize = 32) From b0f7f37ac8b3ca9221e1d176b8d1d893b3cbd9f3 Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Mon, 4 May 2020 12:54:16 +0200 Subject: [PATCH 51/91] D$ now support memDataWidth > 32 --- src/main/scala/vexriscv/TestsWorkspace.scala | 8 +-- src/main/scala/vexriscv/ip/DataCache.scala | 60 +++++++++++-------- src/test/cpp/regression/main.cpp | 45 +++++++++++--- src/test/cpp/regression/makefile | 2 + .../vexriscv/TestIndividualFeatures.scala | 49 ++++++++------- 5 files changed, 104 insertions(+), 60 deletions(-) diff --git a/src/main/scala/vexriscv/TestsWorkspace.scala b/src/main/scala/vexriscv/TestsWorkspace.scala index 04ceda3..40f8b33 100644 --- a/src/main/scala/vexriscv/TestsWorkspace.scala +++ b/src/main/scala/vexriscv/TestsWorkspace.scala @@ -27,7 +27,7 @@ import spinal.lib.bus.avalon.AvalonMM import spinal.lib.eda.altera.{InterruptReceiverTag, ResetEmitterTag} -//make clean all SEED=42 MMU=no STOP_ON_ERROR=yes DBUS_EXCLUSIVE=yes DBUS_INVALIDATE=yes SUPERVISOR=yes REDO=0 DHRYSTONE=no LRSC=yes AMO=yes LINUX_REGRESSION=yes TRACE=yes TRACE_START=9546629800l FLOW_INFO=ye +// make clean all SEED=42 MMU=no STOP_ON_ERROR=yes DBUS_EXCLUSIVE=yes DBUS_INVALIDATE=yes SUPERVISOR=yes REDO=1 DHRYSTONE=yes LRSC=yes AMO=yes LINUX_REGRESSION=yes TRACE=yes TRACE_START=1000000000 FLOW_INFO=ye IBUS_DATA_WIDTH=128 DBUS_DATA_WIDTH=128 object TestsWorkspace { def main(args: Array[String]) { def configFull = { @@ -60,7 +60,7 @@ object TestsWorkspace { injectorStage = false, config = InstructionCacheConfig( cacheSize = 4096*1, - bytePerLine = 32, + bytePerLine = 64, wayCount = 1, addressWidth = 32, cpuDataWidth = 32, @@ -92,11 +92,11 @@ object TestsWorkspace { dBusRspSlavePipe = true, config = new DataCacheConfig( cacheSize = 4096*1, - bytePerLine = 32, + bytePerLine = 64, wayCount = 1, addressWidth = 32, cpuDataWidth = 32, - memDataWidth = 32, + memDataWidth = 128, catchAccessError = true, catchIllegal = true, catchUnaligned = true, diff --git a/src/main/scala/vexriscv/ip/DataCache.scala b/src/main/scala/vexriscv/ip/DataCache.scala index 2e33999..9de6f09 100644 --- a/src/main/scala/vexriscv/ip/DataCache.scala +++ b/src/main/scala/vexriscv/ip/DataCache.scala @@ -34,7 +34,7 @@ case class DataCacheConfig(cacheSize : Int, assert(isPow2(pendingMax)) def withWriteResponse = withExclusive def burstSize = bytePerLine*8/memDataWidth - val burstLength = bytePerLine/(memDataWidth/8) + val burstLength = bytePerLine/(cpuDataWidth/8) def catchSomething = catchUnaligned || catchIllegal || catchAccessError def withInternalAmo = withAmo && !withExclusive def withInternalLrSc = withLrSc && !withExclusive @@ -196,8 +196,8 @@ case class DataCacheMemCmd(p : DataCacheConfig) extends Bundle{ val wr = Bool val uncached = Bool val address = UInt(p.addressWidth bit) - val data = Bits(p.memDataWidth bits) - val mask = Bits(p.memDataWidth/8 bits) + val data = Bits(p.cpuDataWidth bits) + val mask = Bits(p.cpuDataWidth/8 bits) val length = UInt(log2Up(p.burstLength) bits) val exclusive = p.withExclusive generate Bool() val last = Bool @@ -424,7 +424,6 @@ object DataCacheExternalAmoStates extends SpinalEnum{ //If external amo, mem rsp should stay class DataCache(val p : DataCacheConfig) extends Component{ import p._ - assert(cpuDataWidth == memDataWidth) val io = new Bundle{ val cpu = slave(DataCacheCpuBus(p)) @@ -434,19 +433,24 @@ class DataCache(val p : DataCacheConfig) extends Component{ val haltCpu = False val lineWidth = bytePerLine*8 val lineCount = cacheSize/bytePerLine - val wordWidth = Math.max(memDataWidth,cpuDataWidth) + val wordWidth = cpuDataWidth val wordWidthLog2 = log2Up(wordWidth) val wordPerLine = lineWidth/wordWidth val bytePerWord = wordWidth/8 val wayLineCount = lineCount/wayCount val wayLineLog2 = log2Up(wayLineCount) val wayWordCount = wayLineCount * wordPerLine + val memWordPerLine = lineWidth/memDataWidth val memTransactionPerLine = p.bytePerLine / (p.memDataWidth/8) + val bytePerMemWord = memDataWidth/8 + val wayMemWordCount = wayLineCount * memWordPerLine val tagRange = addressWidth-1 downto log2Up(wayLineCount*bytePerLine) val lineRange = tagRange.low-1 downto log2Up(bytePerLine) - val wordRange = log2Up(bytePerLine)-1 downto log2Up(bytePerWord) + val cpuWordRange = log2Up(bytePerLine)-1 downto log2Up(bytePerWord) + val memWordRange = log2Up(bytePerLine)-1 downto log2Up(bytePerMemWord) val hitRange = tagRange.high downto lineRange.low + val memWordToCpuWordRange = log2Up(bytePerMemWord)-1 downto log2Up(bytePerWord) class LineInfo() extends Bundle{ @@ -464,23 +468,24 @@ class DataCache(val p : DataCacheConfig) extends Component{ val tagsWriteLastCmd = RegNext(tagsWriteCmd) - val dataReadCmd = Flow(UInt(log2Up(wayWordCount) bits)) + val dataReadCmd = Flow(UInt(log2Up(wayMemWordCount) bits)) val dataWriteCmd = Flow(new Bundle{ val way = Bits(wayCount bits) - val address = UInt(log2Up(wayWordCount) bits) - val data = Bits(wordWidth bits) - val mask = Bits(wordWidth/8 bits) + val address = UInt(log2Up(wayMemWordCount) bits) + val data = Bits(memDataWidth bits) + val mask = Bits(memDataWidth/8 bits) }) - val ways = for(i <- 0 until wayCount) yield new Area{ val tags = Mem(new LineInfo(), wayLineCount) - val data = Mem(Bits(wordWidth bit), wayWordCount) + val data = Mem(Bits(memDataWidth bit), wayMemWordCount) //Reads val tagsReadRsp = tags.readSync(tagsReadCmd.payload, tagsReadCmd.valid && !io.cpu.memory.isStuck) - val dataReadRsp = data.readSync(dataReadCmd.payload, dataReadCmd.valid && !io.cpu.memory.isStuck) + val dataReadRspMem = data.readSync(dataReadCmd.payload, dataReadCmd.valid && !io.cpu.memory.isStuck) + val dataReadRspSel = if(mergeExecuteMemory) io.cpu.writeBack.address else io.cpu.memory.address + val dataReadRsp = dataReadRspMem.subdivideIn(cpuDataWidth bits).read(dataReadRspSel(memWordToCpuWordRange)) val tagsInvReadRsp = withInvalidate generate tags.readSync(tagsInvReadCmd.payload, tagsInvReadCmd.valid) @@ -511,13 +516,15 @@ class DataCache(val p : DataCacheConfig) extends Component{ tagsReadCmd.valid := True dataReadCmd.valid := True tagsReadCmd.payload := io.cpu.execute.address(lineRange) - dataReadCmd.payload := io.cpu.execute.address(lineRange.high downto wordRange.low) + dataReadCmd.payload := io.cpu.execute.address(lineRange.high downto memWordRange.low) } def collisionProcess(readAddress : UInt, readMask : Bits): Bits ={ val ret = Bits(wayCount bits) + val readAddressAligned = (readAddress >> log2Up(memDataWidth/cpuDataWidth)) + val dataWriteMaskAligned = dataWriteCmd.mask.subdivideIn(memDataWidth/cpuDataWidth slices).read(readAddress(log2Up(memDataWidth/cpuDataWidth)-1 downto 0)) for(i <- 0 until wayCount){ - ret(i) := dataWriteCmd.valid && dataWriteCmd.way(i) && dataWriteCmd.address === readAddress && (readMask & dataWriteCmd.mask) =/= 0 + ret(i) := dataWriteCmd.valid && dataWriteCmd.way(i) && dataWriteCmd.address === readAddressAligned && (readMask & dataWriteMaskAligned) =/= 0 } ret } @@ -600,7 +607,7 @@ class DataCache(val p : DataCacheConfig) extends Component{ U(1) -> B"0011", default -> B"1111" ) |<< io.cpu.execute.address(1 downto 0) - val dataColisions = collisionProcess(io.cpu.execute.address(lineRange.high downto wordRange.low), mask) + val dataColisions = collisionProcess(io.cpu.execute.address(lineRange.high downto cpuWordRange.low), mask) val wayInvalidate = B(0, wayCount bits) //Used if invalidate enabled val isAmo = if(withAmo) io.cpu.execute.isAmo else False @@ -643,7 +650,7 @@ class DataCache(val p : DataCacheConfig) extends Component{ stagePipe(stage0.dataColisions) } else { //Assume the writeback stage will never be unstall memory acces while memory stage is stalled - stagePipe(stage0.dataColisions) | collisionProcess(io.cpu.memory.address(lineRange.high downto wordRange.low), mask) + stagePipe(stage0.dataColisions) | collisionProcess(io.cpu.memory.address(lineRange.high downto cpuWordRange.low), mask) } } @@ -667,7 +674,7 @@ class DataCache(val p : DataCacheConfig) extends Component{ //Loader interface val loaderValid = False - + val ioMemRspMuxed = io.mem.rsp.data.subdivideIn(cpuDataWidth bits).read(io.cpu.writeBack.address(memWordToCpuWordRange)) io.cpu.writeBack.haltIt := io.cpu.writeBack.isValid @@ -717,7 +724,7 @@ class DataCache(val p : DataCacheConfig) extends Component{ import DataCacheExternalAmoStates._ val amo = withAmo generate new Area{ def rf = request.data - def mem = if(withInternalAmo) dataMux else io.mem.rsp.data + def mem = if(withInternalAmo) dataMux else ioMemRspMuxed val compare = request.amoCtrl.alu.msb val unsigned = request.amoCtrl.alu(2 downto 1) === B"11" val addSub = (rf.asSInt + Mux(compare, ~mem, mem).asSInt + Mux(compare, S(1), S(0))).asBits @@ -748,9 +755,10 @@ class DataCache(val p : DataCacheConfig) extends Component{ val cpuWriteToCache = False when(cpuWriteToCache){ dataWriteCmd.valid setWhen(request.wr && waysHit) - dataWriteCmd.address := mmuRsp.physicalAddress(lineRange.high downto wordRange.low) - dataWriteCmd.data := requestDataBypass - dataWriteCmd.mask := mask + dataWriteCmd.address := mmuRsp.physicalAddress(lineRange.high downto memWordRange.low) + dataWriteCmd.data.subdivideIn(cpuDataWidth bits).foreach(_ := requestDataBypass) + dataWriteCmd.mask := 0 + dataWriteCmd.mask.subdivideIn(cpuDataWidth/8 bits).write(io.cpu.writeBack.address(memWordToCpuWordRange), mask) dataWriteCmd.way := waysHits } @@ -761,7 +769,7 @@ class DataCache(val p : DataCacheConfig) extends Component{ io.cpu.writeBack.isWrite := request.wr io.mem.cmd.valid := False - io.mem.cmd.address := mmuRsp.physicalAddress(tagRange.high downto wordRange.low) @@ U(0, wordRange.low bit) + io.mem.cmd.address := mmuRsp.physicalAddress(tagRange.high downto cpuWordRange.low) @@ U(0, cpuWordRange.low bits) io.mem.cmd.length := 0 io.mem.cmd.last := True io.mem.cmd.wr := request.wr @@ -825,7 +833,7 @@ class DataCache(val p : DataCacheConfig) extends Component{ //Write through io.mem.cmd.valid setWhen(request.wr) - io.mem.cmd.address := mmuRsp.physicalAddress(tagRange.high downto wordRange.low) @@ U(0, wordRange.low bit) + io.mem.cmd.address := mmuRsp.physicalAddress(tagRange.high downto cpuWordRange.low) @@ U(0, cpuWordRange.low bits) io.mem.cmd.length := 0 io.cpu.writeBack.haltIt clearWhen(!request.wr || io.mem.cmd.ready) @@ -861,10 +869,10 @@ class DataCache(val p : DataCacheConfig) extends Component{ } when(bypassCache){ - io.cpu.writeBack.data := io.mem.rsp.data + io.cpu.writeBack.data := ioMemRspMuxed if(catchAccessError) io.cpu.writeBack.accessError := io.mem.rsp.valid && io.mem.rsp.error } otherwise { - io.cpu.writeBack.data := dataMux + io.cpu.writeBack.data := dataMux if(catchAccessError) io.cpu.writeBack.accessError := (waysHits & B(tagsReadRsp.map(_.error))) =/= 0 } diff --git a/src/test/cpp/regression/main.cpp b/src/test/cpp/regression/main.cpp index 5fa9635..e0f50ab 100644 --- a/src/test/cpp/regression/main.cpp +++ b/src/test/cpp/regression/main.cpp @@ -2028,7 +2028,7 @@ public: #endif error = false; for(int idx = 0;idx < IBUS_DATA_WIDTH/32;idx++){ - bool localError; + bool localError = false; ws->iBusAccess(address+idx*4,((uint32_t*)&top->iBus_rsp_payload_data)+idx,&localError); error |= localError; } @@ -2342,7 +2342,7 @@ public: #include struct DBusCachedTask{ - uint32_t data; + char data[DBUS_DATA_WIDTH/8]; bool error; bool last; bool exclusive; @@ -2386,21 +2386,43 @@ public: bool error; ws->dBusAccess(top->dBus_cmd_payload_address,1,2,top->dBus_cmd_payload_mask,&top->dBus_cmd_payload_data,&error); #else - bool cancel = false; + bool cancel = false, error = false; if(top->dBus_cmd_payload_exclusive){ bool hit = reservationValid && reservationAddress == top->dBus_cmd_payload_address; rsp.exclusive = hit; cancel = !hit; reservationValid = false; } - if(!cancel) ws->dBusAccess(top->dBus_cmd_payload_address,1,2,top->dBus_cmd_payload_mask,&top->dBus_cmd_payload_data,&rsp.error); + if(!cancel) { + for(int idx = 0;idx < 1;idx++){ + bool localError = false; + ws->dBusAccess(top->dBus_cmd_payload_address+idx*4,1,2,top->dBus_cmd_payload_mask >> idx*4,((uint32_t*)&top->dBus_cmd_payload_data)+idx, &localError); + error |= localError; + + //printf("%d ", (int)localError); + } + } + + // printf("%x %d\n", top->dBus_cmd_payload_address, (int)error); rsp.last = true; + rsp.error = error; rsps.push(rsp); #endif } else { - for(int beat = 0;beat <= top->dBus_cmd_payload_length;beat++){ - ws->dBusAccess(top->dBus_cmd_payload_address + beat * 4,0,2,0,&rsp.data,&rsp.error); - rsp.last = beat == top->dBus_cmd_payload_length; + bool error = false; + uint32_t beatCount = top->dBus_cmd_payload_length*32/DBUS_DATA_WIDTH; + for(int beat = 0;beat <= beatCount;beat++){ + if(top->dBus_cmd_payload_length == 0){ + uint32_t sel = (top->dBus_cmd_payload_address >> 2) & (DBUS_DATA_WIDTH/32-1); + ws->dBusAccess(top->dBus_cmd_payload_address,0,2,0,((uint32_t*)rsp.data) + sel,&error); + } else { + for(int idx = 0;idx < DBUS_DATA_WIDTH/32;idx++){ + bool localError = false; + ws->dBusAccess(top->dBus_cmd_payload_address + beat * DBUS_DATA_WIDTH/8 + idx*4,0,2,0,((uint32_t*)rsp.data)+idx, &localError); + error |= localError; + } + } + rsp.last = beat == beatCount; #ifdef DBUS_EXCLUSIVE if(top->dBus_cmd_payload_exclusive){ rsp.exclusive = true; @@ -2408,6 +2430,7 @@ public: reservationAddress = top->dBus_cmd_payload_address; } #endif + rsp.error = error; rsps.push(rsp); } @@ -2434,14 +2457,18 @@ public: rsps.pop(); top->dBus_rsp_valid = 1; top->dBus_rsp_payload_error = rsp.error; - top->dBus_rsp_payload_data = rsp.data; + for(int idx = 0;idx < DBUS_DATA_WIDTH/32;idx++){ + ((uint32_t*)&top->dBus_rsp_payload_data)[idx] = ((uint32_t*)rsp.data)[idx]; + } top->dBus_rsp_payload_last = rsp.last; #ifdef DBUS_EXCLUSIVE top->dBus_rsp_payload_exclusive = rsp.exclusive; #endif } else{ top->dBus_rsp_valid = 0; - top->dBus_rsp_payload_data = VL_RANDOM_I(32); + for(int idx = 0;idx < DBUS_DATA_WIDTH/32;idx++){ + ((uint32_t*)&top->dBus_rsp_payload_data)[idx] = VL_RANDOM_I(32); + } top->dBus_rsp_payload_error = VL_RANDOM_I(1); top->dBus_rsp_payload_last = VL_RANDOM_I(1); #ifdef DBUS_EXCLUSIVE diff --git a/src/test/cpp/regression/makefile b/src/test/cpp/regression/makefile index 9836326..da525c5 100644 --- a/src/test/cpp/regression/makefile +++ b/src/test/cpp/regression/makefile @@ -5,6 +5,7 @@ IBUS?=CACHED IBUS_TC?=no IBUS_DATA_WIDTH?=32 DBUS?=CACHED +DBUS_DATA_WIDTH?=32 TRACE?=no TRACE_ACCESS?=no TRACE_START=0 @@ -46,6 +47,7 @@ WITH_USER_IO?=no ADDCFLAGS += -CFLAGS -DREGRESSION_PATH='\"$(REGRESSION_PATH)/\"' ADDCFLAGS += -CFLAGS -DIBUS_${IBUS} ADDCFLAGS += -CFLAGS -DIBUS_DATA_WIDTH=${IBUS_DATA_WIDTH} +ADDCFLAGS += -CFLAGS -DDBUS_DATA_WIDTH=${DBUS_DATA_WIDTH} ADDCFLAGS += -CFLAGS -DDBUS_${DBUS} ADDCFLAGS += -CFLAGS -DREDO=${REDO} diff --git a/src/test/scala/vexriscv/TestIndividualFeatures.scala b/src/test/scala/vexriscv/TestIndividualFeatures.scala index 72651b4..8a7ace1 100644 --- a/src/test/scala/vexriscv/TestIndividualFeatures.scala +++ b/src/test/scala/vexriscv/TestIndividualFeatures.scala @@ -1,7 +1,7 @@ package vexriscv import java.io.{File, OutputStream} -import java.util.concurrent.TimeUnit +import java.util.concurrent.{ForkJoinPool, TimeUnit} import org.apache.commons.io.FileUtils import org.scalatest.{BeforeAndAfterAll, FunSuite, ParallelTestExecution, Tag, Transformer} @@ -426,7 +426,8 @@ class DBusDimension extends VexRiscvDimension("DBus") { // override def isCompatibleWith(positions: Seq[ConfigPosition[VexRiscvConfig]]) = catchAll == positions.exists(_.isInstanceOf[CatchAllPosition]) } } else { - val bytePerLine = List(8,16,32,64)(r.nextInt(4)) + val memDataWidth = List(32,64,128)(r.nextInt(3)) + val bytePerLine = Math.max(memDataWidth/8, List(8,16,32,64)(r.nextInt(4))) var cacheSize = 0 var wayCount = 0 val withLrSc = catchAll @@ -441,8 +442,8 @@ class DBusDimension extends VexRiscvDimension("DBus") { cacheSize = 512 << r.nextInt(5) wayCount = 1 << r.nextInt(3) }while(cacheSize/wayCount < 512 || (catchAll && cacheSize/wayCount > 4096)) - new VexRiscvPosition("Cached" + "S" + cacheSize + "W" + wayCount + "BPL" + bytePerLine + (if(dBusCmdMasterPipe) "Cmp " else "") + (if(dBusCmdSlavePipe) "Csp " else "") + (if(dBusRspSlavePipe) "Rsp " else "") + (if(relaxedMemoryTranslationRegister) "Rmtr " else "") + (if(earlyWaysHits) "Ewh " else "") + (if(withAmo) "Amo " else "") + (if(withSmp) "Smp " else "")) { - override def testParam = "DBUS=CACHED " + (if(withLrSc) "LRSC=yes " else "") + (if(withAmo) "AMO=yes " else "") + (if(withSmp) "DBUS_EXCLUSIVE=yes DBUS_INVALIDATE=yes " else "") + new VexRiscvPosition(s"Cached${memDataWidth}d" + "S" + cacheSize + "W" + wayCount + "BPL" + bytePerLine + (if(dBusCmdMasterPipe) "Cmp " else "") + (if(dBusCmdSlavePipe) "Csp " else "") + (if(dBusRspSlavePipe) "Rsp " else "") + (if(relaxedMemoryTranslationRegister) "Rmtr " else "") + (if(earlyWaysHits) "Ewh " else "") + (if(withAmo) "Amo " else "") + (if(withSmp) "Smp " else "")) { + override def testParam = s"DBUS=CACHED DBUS_DATA_WIDTH=$memDataWidth " + (if(withLrSc) "LRSC=yes " else "") + (if(withAmo) "AMO=yes " else "") + (if(withSmp) "DBUS_EXCLUSIVE=yes DBUS_INVALIDATE=yes " else "") override def applyOn(config: VexRiscvConfig): Unit = { config.plugins += new DBusCachedPlugin( @@ -452,7 +453,7 @@ class DBusDimension extends VexRiscvDimension("DBus") { wayCount = wayCount, addressWidth = 32, cpuDataWidth = 32, - memDataWidth = 32, + memDataWidth = memDataWidth, catchAccessError = catchAll, catchIllegal = catchAll, catchUnaligned = catchAll, @@ -574,8 +575,14 @@ object PlayFuture extends App{ Thread.sleep(8000) } -class MultithreadedFunSuite extends FunSuite { - implicit val ec = ExecutionContext.global +class MultithreadedFunSuite(threadCount : Int) extends FunSuite { + val finalThreadCount = if(threadCount > 0) threadCount else { + val systemInfo = new oshi.SystemInfo + systemInfo.getHardware.getProcessor.getLogicalProcessorCount + } + implicit val ec = ExecutionContext.fromExecutorService( + new ForkJoinPool(finalThreadCount, ForkJoinPool.defaultForkJoinWorkerThreadFactory, null, true) + ) class Job(body : => Unit){ val originalOutput = Console.out val buffer = mutable.Queue[Char]() @@ -612,7 +619,7 @@ class MultithreadedFunSuite extends FunSuite { } -class FunTestPara extends MultithreadedFunSuite{ +class FunTestPara extends MultithreadedFunSuite(3){ def createTest(name : String): Unit ={ test(name){ for(i <- 0 to 4) { @@ -624,20 +631,20 @@ class FunTestPara extends MultithreadedFunSuite{ (0 to 80).map(_.toString).foreach(createTest) } -class FunTestPlay extends FunSuite { - def createTest(name : String): Unit ={ - test(name){ - Thread.sleep(500) - for(i <- 0 to 4) { - println(s"$name $i") - Thread.sleep(500) - } - } - } - (0 to 80).map(_.toString).foreach(createTest) -} +//class FunTestPlay extends FunSuite { +// def createTest(name : String): Unit ={ +// test(name){ +// Thread.sleep(500) +// for(i <- 0 to 4) { +// println(s"$name $i") +// Thread.sleep(500) +// } +// } +// } +// (0 to 80).map(_.toString).foreach(createTest) +//} -class TestIndividualFeatures extends MultithreadedFunSuite { +class TestIndividualFeatures extends MultithreadedFunSuite(sys.env.getOrElse("VEXRISCV_REGRESSION_THREAD_COUNT", "0").toInt) { val testCount = sys.env.getOrElse("VEXRISCV_REGRESSION_CONFIG_COUNT", "100").toInt val seed = sys.env.getOrElse("VEXRISCV_REGRESSION_SEED", Random.nextLong().toString).toLong val testId : Set[Int] = sys.env.get("VEXRISCV_REGRESSION_TEST_ID") match { From c16f2ed7879877c4fe75a6ac658ab0ed6bf2771d Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Mon, 4 May 2020 12:54:28 +0200 Subject: [PATCH 52/91] Add probes in SmpCluster sim --- .../demo/smp/VexRiscvSmpCluster.scala | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala index 92e3976..7f165f2 100644 --- a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala +++ b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala @@ -510,6 +510,39 @@ object VexRiscvSmpClusterOpenSbi extends App{ ram.memory.loadBin(0xC1000000l, "../buildroot/output/images/dtb") ram.memory.loadBin(0xC2000000l, "../buildroot/output/images/rootfs.cpio") + import spinal.core.sim._ + var iMemReadBytes, dMemReadBytes, dMemWriteBytes = 0l + var reportTimer = 0 + var reportCycle = 0 + + import java.io._ + val csv = new PrintWriter(new File("bench.csv" )) + csv.write(s"reportCycle,iMemReadBytes,dMemReadBytes,dMemWriteBytes\n") + dut.clockDomain.onSamplings{ + dut.io.iMems.foreach{ iMem => + if(iMem.cmd.valid.toBoolean && iMem.cmd.ready.toBoolean){ + iMemReadBytes += iMem.cmd.length.toInt+1 + } + } + if(dut.io.dMem.cmd.valid.toBoolean && dut.io.dMem.cmd.ready.toBoolean){ + if(dut.io.dMem.cmd.opcode.toInt == Bmb.Cmd.Opcode.WRITE){ + dMemWriteBytes += dut.io.dMem.cmd.length.toInt+1 + }else { + dMemReadBytes += dut.io.dMem.cmd.length.toInt+1 + } + } + reportTimer = reportTimer + 1 + reportCycle = reportCycle + 1 + if(reportTimer == 100000){ + reportTimer = 0 +// println(f"\n** c=${reportCycle} ir=${iMemReadBytes*1e-6}%5.2f dr=${dMemReadBytes*1e-6}%5.2f dw=${dMemWriteBytes*1e-6}%5.2f **\n") + + csv.write(s"$reportCycle,$iMemReadBytes,$dMemReadBytes,$dMemWriteBytes\n") + csv.flush() + } + } + + // fork{ // disableSimWave() // val atMs = 130 From 09724e907ba984660b1d3a1cedb8ac5ef05498ee Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Tue, 5 May 2020 00:32:59 +0200 Subject: [PATCH 53/91] play around with CSR synthesis impact on design size --- .../scala/vexriscv/demo/SynthesisBench.scala | 102 +++++++++++++++++- .../vexriscv/TestIndividualFeatures.scala | 3 +- 2 files changed, 101 insertions(+), 4 deletions(-) diff --git a/src/main/scala/vexriscv/demo/SynthesisBench.scala b/src/main/scala/vexriscv/demo/SynthesisBench.scala index 94d0055..b6e9f2f 100644 --- a/src/main/scala/vexriscv/demo/SynthesisBench.scala +++ b/src/main/scala/vexriscv/demo/SynthesisBench.scala @@ -6,8 +6,9 @@ import spinal.lib.eda.bench._ import spinal.lib.eda.icestorm.IcestormStdTargets import spinal.lib.eda.xilinx.VivadoFlow import spinal.lib.io.InOutWrapper -import vexriscv.VexRiscv -import vexriscv.plugin.DecoderSimplePlugin +import vexriscv.plugin.CsrAccess.{READ_ONLY, READ_WRITE, WRITE_ONLY} +import vexriscv.{VexRiscv, VexRiscvConfig, plugin} +import vexriscv.plugin.{BranchPlugin, CsrPlugin, CsrPluginConfig, DBusSimplePlugin, DecoderSimplePlugin, FullBarrelShifterPlugin, HazardSimplePlugin, IBusSimplePlugin, IntAluPlugin, LightShifterPlugin, NONE, RegFilePlugin, SrcPlugin, YamlPlugin} import scala.collection.mutable.ArrayBuffer import scala.util.Random @@ -153,6 +154,7 @@ object VexRiscvSynthesisBench { } + // val rtls = List(twoStage, twoStageBarell, twoStageMulDiv, twoStageAll, smallestNoCsr, smallest, smallAndProductive, smallAndProductiveWithICache, fullNoMmuNoCache, noCacheNoMmuMaxPerf, fullNoMmuMaxPerf, fullNoMmu, full, linuxBalanced, linuxBalancedSmp) val rtls = List(linuxBalanced, linuxBalancedSmp) // val rtls = List(smallest) @@ -278,4 +280,100 @@ object AllSynthesisBench { MuraxSynthesisBench.main(args) } +} + + + +object VexRiscvCustomSynthesisBench { + def main(args: Array[String]) { + + + def gen(csr : CsrPlugin) = new VexRiscv( + config = VexRiscvConfig( + plugins = List( + new IBusSimplePlugin( + resetVector = 0x80000000l, + cmdForkOnSecondStage = false, + cmdForkPersistence = false, + prediction = NONE, + catchAccessFault = false, + compressedGen = false + ), + new DBusSimplePlugin( + catchAddressMisaligned = false, + catchAccessFault = false + ), + new DecoderSimplePlugin( + catchIllegalInstruction = false + ), + new RegFilePlugin( + regFileReadyKind = plugin.SYNC, + zeroBoot = false + ), + new IntAluPlugin, + new SrcPlugin( + separatedAddSub = false, + executeInsertion = true + ), + csr, + new FullBarrelShifterPlugin(), + new HazardSimplePlugin( + bypassExecute = true, + bypassMemory = true, + bypassWriteBack = true, + bypassWriteBackBuffer = true, + pessimisticUseSrc = false, + pessimisticWriteRegFile = false, + pessimisticAddressMatch = false + ), + new BranchPlugin( + earlyBranch = false, + catchAddressMisaligned = false + ), + new YamlPlugin("cpu0.yaml") + ) + ) + ) + + + val fixedMtvec = new Rtl { + override def getName(): String = "Fixed MTVEC" + override def getRtlPath(): String = "fixedMtvec.v" + SpinalVerilog(gen(new CsrPlugin(CsrPluginConfig.smallest(0x80000000l))).setDefinitionName(getRtlPath().split("\\.").head)) + } + + val writeOnlyMtvec = new Rtl { + override def getName(): String = "write only MTVEC" + override def getRtlPath(): String = "woMtvec.v" + SpinalVerilog(gen(new CsrPlugin(CsrPluginConfig.smallest(null).copy(mtvecAccess = WRITE_ONLY))).setDefinitionName(getRtlPath().split("\\.").head)) + } + + val readWriteMtvec = new Rtl { + override def getName(): String = "read write MTVEC" + override def getRtlPath(): String = "wrMtvec.v" + SpinalVerilog(gen(new CsrPlugin(CsrPluginConfig.smallest(null).copy(mtvecAccess = READ_WRITE))).setDefinitionName(getRtlPath().split("\\.").head)) + } + + val fixedMtvecRoCounter = new Rtl { + override def getName(): String = "Fixed MTVEC, read only mcycle/minstret" + override def getRtlPath(): String = "fixedMtvecRoCounter.v" + SpinalVerilog(gen(new CsrPlugin(CsrPluginConfig.smallest(0x80000000l).copy(mcycleAccess = READ_ONLY, minstretAccess = READ_ONLY))).setDefinitionName(getRtlPath().split("\\.").head)) + } + + + val rwMtvecRoCounter = new Rtl { + override def getName(): String = "read write MTVEC, read only mcycle/minstret" + override def getRtlPath(): String = "readWriteMtvecRoCounter.v" + SpinalVerilog(gen(new CsrPlugin(CsrPluginConfig.smallest(null).copy(mtvecAccess = READ_WRITE, mcycleAccess = READ_ONLY, minstretAccess = READ_ONLY))).setDefinitionName(getRtlPath().split("\\.").head)) + } + + + // val rtls = List(twoStage, twoStageBarell, twoStageMulDiv, twoStageAll, smallestNoCsr, smallest, smallAndProductive, smallAndProductiveWithICache, fullNoMmuNoCache, noCacheNoMmuMaxPerf, fullNoMmuMaxPerf, fullNoMmu, full, linuxBalanced, linuxBalancedSmp) + val rtls = List(fixedMtvec, writeOnlyMtvec, readWriteMtvec,fixedMtvecRoCounter, rwMtvecRoCounter) + // val rtls = List(smallest) + val targets = XilinxStdTargets() ++ AlteraStdTargets() ++ IcestormStdTargets().take(1) + + // val targets = IcestormStdTargets() + Bench(rtls, targets) + } } \ No newline at end of file diff --git a/src/test/scala/vexriscv/TestIndividualFeatures.scala b/src/test/scala/vexriscv/TestIndividualFeatures.scala index 8a7ace1..eafd1d9 100644 --- a/src/test/scala/vexriscv/TestIndividualFeatures.scala +++ b/src/test/scala/vexriscv/TestIndividualFeatures.scala @@ -577,8 +577,7 @@ object PlayFuture extends App{ class MultithreadedFunSuite(threadCount : Int) extends FunSuite { val finalThreadCount = if(threadCount > 0) threadCount else { - val systemInfo = new oshi.SystemInfo - systemInfo.getHardware.getProcessor.getLogicalProcessorCount + new oshi.SystemInfo().getHardware.getProcessor.getLogicalProcessorCount } implicit val ec = ExecutionContext.fromExecutorService( new ForkJoinPool(finalThreadCount, ForkJoinPool.defaultForkJoinWorkerThreadFactory, null, true) From 8043feebd504a37b110060b5944cbf9bd7921608 Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Wed, 6 May 2020 17:06:17 +0200 Subject: [PATCH 54/91] More VexRiscv smp cluster probes --- .../demo/smp/VexRiscvSmpCluster.scala | 59 +++++++++++++++---- 1 file changed, 49 insertions(+), 10 deletions(-) diff --git a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala index 7f165f2..2f694cc 100644 --- a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala +++ b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala @@ -8,7 +8,7 @@ import spinal.lib.bus.bmb.sim.BmbMemoryAgent import spinal.lib.bus.bmb.{Bmb, BmbArbiter, BmbDecoder, BmbExclusiveMonitor, BmbInvalidateMonitor, BmbParameter} import spinal.lib.com.jtag.Jtag import spinal.lib.com.jtag.sim.JtagTcp -import vexriscv.ip.{DataCacheAck, DataCacheConfig, DataCacheMemBus, InstructionCacheConfig} +import vexriscv.ip.{DataCacheAck, DataCacheConfig, DataCacheMemBus, InstructionCache, InstructionCacheConfig} import vexriscv.plugin.{BranchPlugin, CsrPlugin, CsrPluginConfig, DBusCachedPlugin, DBusSimplePlugin, DebugPlugin, DecoderSimplePlugin, FullBarrelShifterPlugin, HazardSimplePlugin, IBusCachedPlugin, IBusSimplePlugin, IntAluPlugin, MmuPlugin, MmuPortConfig, MulDivIterativePlugin, MulPlugin, RegFilePlugin, STATIC, SrcPlugin, YamlPlugin} import vexriscv.{Riscv, VexRiscv, VexRiscvConfig, plugin} @@ -491,10 +491,21 @@ object VexRiscvSmpClusterOpenSbi extends App{ simConfig.allOptimisation simConfig.addSimulatorFlag("--threads 1") - val cpuCount = 4 + val cpuCount = 1 val withStall = false - simConfig.workspaceName("rawr_4c").compile(VexRiscvSmpClusterGen.vexRiscvCluster(cpuCount, resetVector = 0x80000000l)).doSimUntilVoid(seed = 42){dut => + def gen = { + val dut = VexRiscvSmpClusterGen.vexRiscvCluster(cpuCount, resetVector = 0x80000000l) + dut.cpus.foreach{cpu => + cpu.core.children.foreach{ + case cache : InstructionCache => cache.io.cpu.decode.simPublic() + case _ => + } + } + dut + } + + simConfig.workspaceName("rawr_4c").compile(gen).doSimUntilVoid(seed = 42){dut => // dut.clockDomain.forkSimSpeedPrinter(1.0) VexRiscvSmpClusterTestInfrastructure.init(dut) val ram = VexRiscvSmpClusterTestInfrastructure.ram(dut, withStall) @@ -511,17 +522,39 @@ object VexRiscvSmpClusterOpenSbi extends App{ ram.memory.loadBin(0xC2000000l, "../buildroot/output/images/rootfs.cpio") import spinal.core.sim._ - var iMemReadBytes, dMemReadBytes, dMemWriteBytes = 0l + var iMemReadBytes, dMemReadBytes, dMemWriteBytes, iMemSequencial,iMemRequests = 0l var reportTimer = 0 var reportCycle = 0 import java.io._ val csv = new PrintWriter(new File("bench.csv" )) - csv.write(s"reportCycle,iMemReadBytes,dMemReadBytes,dMemWriteBytes\n") + val iMemCtx = Array.tabulate(cpuCount)(i => new { + var sequencialPrediction = 0l + val cache = dut.cpus(i).core.children.find(_.isInstanceOf[InstructionCache]).head.asInstanceOf[InstructionCache].io.cpu.decode + }) + csv.write(s"reportCycle,iMemReadBytes,dMemReadBytes,dMemWriteBytes,miaou,asd\n") dut.clockDomain.onSamplings{ - dut.io.iMems.foreach{ iMem => - if(iMem.cmd.valid.toBoolean && iMem.cmd.ready.toBoolean){ - iMemReadBytes += iMem.cmd.length.toInt+1 + for(i <- 0 until cpuCount; iMem = dut.io.iMems(i); ctx = iMemCtx(i)){ +// if(iMem.cmd.valid.toBoolean && iMem.cmd.ready.toBoolean){ +// val length = iMem.cmd.length.toInt + 1 +// val address = iMem.cmd.address.toLong +// iMemReadBytes += length +// iMemRequests += 1 +// } + if(ctx.cache.isValid.toBoolean && !ctx.cache.mmuRefilling.toBoolean && !ctx.cache.mmuException.toBoolean){ + val address = ctx.cache.physicalAddress.toLong + val length = ctx.cache.p.bytePerLine.toLong + val mask = ~(length-1) + if(ctx.cache.cacheMiss.toBoolean) { + iMemReadBytes += length + iMemRequests += 1 + if ((address & mask) == (ctx.sequencialPrediction & mask)) { + iMemSequencial += 1 + } + } + if(!ctx.cache.isStuck.toBoolean) { + ctx.sequencialPrediction = address + length + } } } if(dut.io.dMem.cmd.valid.toBoolean && dut.io.dMem.cmd.ready.toBoolean){ @@ -533,12 +566,18 @@ object VexRiscvSmpClusterOpenSbi extends App{ } reportTimer = reportTimer + 1 reportCycle = reportCycle + 1 - if(reportTimer == 100000){ + if(reportTimer == 400000){ reportTimer = 0 // println(f"\n** c=${reportCycle} ir=${iMemReadBytes*1e-6}%5.2f dr=${dMemReadBytes*1e-6}%5.2f dw=${dMemWriteBytes*1e-6}%5.2f **\n") - csv.write(s"$reportCycle,$iMemReadBytes,$dMemReadBytes,$dMemWriteBytes\n") + csv.write(s"$reportCycle,$iMemReadBytes,$dMemReadBytes,$dMemWriteBytes,$iMemRequests,$iMemSequencial\n") csv.flush() + reportCycle = 0 + iMemReadBytes = 0 + dMemReadBytes = 0 + dMemWriteBytes = 0 + iMemRequests = 0 + iMemSequencial = 0 } } From ed4a89e4af4a9e70a72db3c4284218d120f2a46c Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Wed, 6 May 2020 17:06:45 +0200 Subject: [PATCH 55/91] more pipelineing in Litex SMP cluster interconnect --- .../demo/smp/VexRiscvSmpLitexCluster.scala | 23 ++++++++++++------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexCluster.scala b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexCluster.scala index c2c287a..bb02e60 100644 --- a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexCluster.scala +++ b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexCluster.scala @@ -180,10 +180,11 @@ case class BmbToLiteDram(bmbParameter : BmbParameter, io.output.cmd.we := cmdFork.isWrite if(bmbParameter.canWrite) { - val fifo = dataFork.throwWhen(dataFork.isRead).queue(wdataFifoSize) - io.output.wdata.arbitrationFrom(fifo) - io.output.wdata.data := fifo.data - io.output.wdata.we := fifo.mask + val wData = Stream(LiteDramNativeWData(liteDramParameter)) + wData.arbitrationFrom(dataFork.throwWhen(dataFork.isRead)) + wData.data := dataFork.data + wData.we := dataFork.mask + io.output.wdata << wData.queue(wdataFifoSize) } else { dataFork.ready := True io.output.wdata.valid := False @@ -305,7 +306,13 @@ case class VexRiscvLitexSmpCluster(p : VexRiscvLitexSmpClusterParameter, pendingMax = 15 ) iBusDecoder.io.input << iBusArbiter.io.output - val iMemBridge = io.iMem.fromBmb(iBusDecoder.io.outputs(1), wdataFifoSize = 0, rdataFifoSize = 32) + + val iMem = LiteDramNative(p.liteDram) + val iMemBridge = iMem.fromBmb(iBusDecoder.io.outputs(1), wdataFifoSize = 0, rdataFifoSize = 32) + iMem.cmd >-> io.iMem.cmd + iMem.wdata >> io.iMem.wdata + iMem.rdata << io.iMem.rdata + val peripheralAccessLength = Math.max(iBusDecoder.io.outputs(0).p.lengthWidth, dBusDecoder.io.outputs(0).p.lengthWidth) val peripheralArbiter = BmbArbiter( @@ -316,7 +323,7 @@ case class VexRiscvLitexSmpCluster(p : VexRiscvLitexSmpClusterParameter, peripheralArbiter.io.inputs(0) << iBusDecoder.io.outputs(0).resize(dataWidth = 32).pipelined(cmdHalfRate = true, rspValid = true) peripheralArbiter.io.inputs(1) << dBusDecoder.io.outputs(0).resize(dataWidth = 32).pipelined(cmdHalfRate = true, rspValid = true) - val peripheralWishbone = peripheralArbiter.io.output.toWishbone() + val peripheralWishbone = peripheralArbiter.io.output.pipelined(cmdValid = true).toWishbone() io.peripheral << peripheralWishbone } @@ -343,8 +350,8 @@ object VexRiscvLitexSmpClusterGen extends App { debugClockDomain = ClockDomain.current.copy(reset = Bool().setName("debugResetIn")) ) -// SpinalVerilog(Bench.compressIo(dutGen)) - SpinalVerilog(dutGen) + SpinalVerilog(Bench.compressIo(dutGen)) +// SpinalVerilog(dutGen) } From 6323caf265a69d1f183a93a7166861bed8b16c04 Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Wed, 6 May 2020 17:09:46 +0200 Subject: [PATCH 56/91] MMU now allow $ to match tag against tlb pyhsical values directly D$ retiming D$ directTlbHit feature added for better timings --- src/main/scala/vexriscv/Services.scala | 13 +++- src/main/scala/vexriscv/ip/DataCache.scala | 36 +++++++---- .../scala/vexriscv/ip/InstructionCache.scala | 12 ++-- .../vexriscv/plugin/DBusCachedPlugin.scala | 9 ++- .../vexriscv/plugin/DBusSimplePlugin.scala | 3 +- .../vexriscv/plugin/IBusCachedPlugin.scala | 2 +- .../plugin/MemoryTranslatorPlugin.scala | 4 +- .../scala/vexriscv/plugin/MmuPlugin.scala | 62 ++++++++++++------- .../plugin/StaticMemoryTranslatorPlugin.scala | 3 +- .../vexriscv/TestIndividualFeatures.scala | 6 +- 10 files changed, 97 insertions(+), 53 deletions(-) diff --git a/src/main/scala/vexriscv/Services.scala b/src/main/scala/vexriscv/Services.scala index 4b0aeca..1c9a2ae 100644 --- a/src/main/scala/vexriscv/Services.scala +++ b/src/main/scala/vexriscv/Services.scala @@ -68,17 +68,24 @@ case class MemoryTranslatorCmd() extends Bundle{ val virtualAddress = UInt(32 bits) val bypassTranslation = Bool } -case class MemoryTranslatorRsp() extends Bundle{ +case class MemoryTranslatorRsp(wayCount : Int) extends Bundle{ val physicalAddress = UInt(32 bits) val isIoAccess = Bool val allowRead, allowWrite, allowExecute = Bool val exception = Bool val refilling = Bool + val bypassTranslation = Bool + val ways = Vec(MemoryTranslatorRspWay(), wayCount) +} +case class MemoryTranslatorRspWay() extends Bundle{ + val sel = Bool() + val physical = UInt(32 bits) } -case class MemoryTranslatorBus() extends Bundle with IMasterSlave{ + +case class MemoryTranslatorBus(wayCount : Int) extends Bundle with IMasterSlave{ val cmd = MemoryTranslatorCmd() - val rsp = MemoryTranslatorRsp() + val rsp = MemoryTranslatorRsp(wayCount) val end = Bool val busy = Bool diff --git a/src/main/scala/vexriscv/ip/DataCache.scala b/src/main/scala/vexriscv/ip/DataCache.scala index 9de6f09..657b8c0 100644 --- a/src/main/scala/vexriscv/ip/DataCache.scala +++ b/src/main/scala/vexriscv/ip/DataCache.scala @@ -28,6 +28,7 @@ case class DataCacheConfig(cacheSize : Int, withExclusive : Boolean = false, withInvalidate : Boolean = false, pendingMax : Int = 32, + directTlbHit : Boolean = false, mergeExecuteMemory : Boolean = false){ assert(!(mergeExecuteMemory && (earlyDataMux || earlyWaysHits))) assert(!(earlyDataMux && !earlyWaysHits)) @@ -124,13 +125,13 @@ case class DataCacheCpuExecuteArgs(p : DataCacheConfig) extends Bundle{ val totalyConsistent = Bool() //Only for AMO/LRSC } -case class DataCacheCpuMemory(p : DataCacheConfig) extends Bundle with IMasterSlave{ +case class DataCacheCpuMemory(p : DataCacheConfig, tlbWayCount : Int) extends Bundle with IMasterSlave{ val isValid = Bool val isStuck = Bool val isRemoved = Bool val isWrite = Bool val address = UInt(p.addressWidth bit) - val mmuBus = MemoryTranslatorBus() + val mmuBus = MemoryTranslatorBus(tlbWayCount) override def asMaster(): Unit = { out(isValid, isStuck, isRemoved, address) @@ -174,9 +175,9 @@ case class DataCacheCpuWriteBack(p : DataCacheConfig) extends Bundle with IMaste } } -case class DataCacheCpuBus(p : DataCacheConfig) extends Bundle with IMasterSlave{ +case class DataCacheCpuBus(p : DataCacheConfig, tlbWayCount : Int) extends Bundle with IMasterSlave{ val execute = DataCacheCpuExecute(p) - val memory = DataCacheCpuMemory(p) + val memory = DataCacheCpuMemory(p, tlbWayCount) val writeBack = DataCacheCpuWriteBack(p) val redo = Bool() @@ -422,11 +423,11 @@ object DataCacheExternalAmoStates extends SpinalEnum{ } //If external amo, mem rsp should stay -class DataCache(val p : DataCacheConfig) extends Component{ +class DataCache(val p : DataCacheConfig, tlbWayCount : Int) extends Component{ import p._ val io = new Bundle{ - val cpu = slave(DataCacheCpuBus(p)) + val cpu = slave(DataCacheCpuBus(p, tlbWayCount)) val mem = master(DataCacheMemBus(p)) } @@ -537,11 +538,12 @@ class DataCache(val p : DataCacheConfig) extends Component{ val memCmdSent = RegInit(False) setWhen (io.mem.cmd.ready) clearWhen (!io.cpu.writeBack.isStuck) val pending = withExclusive generate new Area{ val counter = Reg(UInt(log2Up(pendingMax) + 1 bits)) init(0) - counter := counter + U(io.mem.cmd.fire && io.mem.cmd.last) - U(io.mem.rsp.valid && io.mem.rsp.last) + val counterNext = counter + U(io.mem.cmd.fire && io.mem.cmd.last) - U(io.mem.rsp.valid && io.mem.rsp.last) + counter := counterNext - val done = counter === 0 - val full = RegNext(counter.msb) - val last = counter === 1 + val done = RegNext(counterNext === 0) + val full = RegNext(counter.msb) //Has margin + val last = RegNext(counterNext === 1) //Equivalent to counter === 1 but pipelined if(!withInvalidate) { io.cpu.execute.haltIt setWhen(full) @@ -643,7 +645,19 @@ class DataCache(val p : DataCacheConfig) extends Component{ } } - val wayHits = earlyWaysHits generate ways.map(way => (io.cpu.memory.mmuBus.rsp.physicalAddress(tagRange) === way.tagsReadRsp.address && way.tagsReadRsp.valid)) + val wayHits = earlyWaysHits generate Bits(wayCount bits) + val indirectTlbHitGen = (earlyWaysHits && !directTlbHit) generate new Area { + wayHits := B(ways.map(way => (io.cpu.memory.mmuBus.rsp.physicalAddress(tagRange) === way.tagsReadRsp.address && way.tagsReadRsp.valid))) + } + val directTlbHitGen = (earlyWaysHits && directTlbHit) generate new Area { + val wayTlbHits = for (way <- ways) yield for (tlb <- io.cpu.memory.mmuBus.rsp.ways) yield { + way.tagsReadRsp.address === tlb.physical(tagRange) && tlb.sel + } + val translatedHits = B(wayTlbHits.map(_.orR)) + val bypassHits = B(ways.map(_.tagsReadRsp.address === io.cpu.memory.address(tagRange))) + wayHits := (io.cpu.memory.mmuBus.rsp.bypassTranslation ? bypassHits | translatedHits) & B(ways.map(_.tagsReadRsp.valid)) + } + val dataMux = earlyDataMux generate MuxOH(wayHits, ways.map(_.dataReadRsp)) val wayInvalidate = stagePipe(stage0. wayInvalidate) val dataColisions = if(mergeExecuteMemory){ diff --git a/src/main/scala/vexriscv/ip/InstructionCache.scala b/src/main/scala/vexriscv/ip/InstructionCache.scala index 7560114..d684298 100644 --- a/src/main/scala/vexriscv/ip/InstructionCache.scala +++ b/src/main/scala/vexriscv/ip/InstructionCache.scala @@ -104,7 +104,7 @@ trait InstructionCacheCommons{ val cacheMiss, error, mmuRefilling, mmuException, isUser : Bool } -case class InstructionCacheCpuFetch(p : InstructionCacheConfig) extends Bundle with IMasterSlave with InstructionCacheCommons { +case class InstructionCacheCpuFetch(p : InstructionCacheConfig, tlbWayCount : Int) extends Bundle with IMasterSlave with InstructionCacheCommons { val isValid = Bool() val isStuck = Bool() val isRemoved = Bool() @@ -112,7 +112,7 @@ case class InstructionCacheCpuFetch(p : InstructionCacheConfig) extends Bundle w val data = Bits(p.cpuDataWidth bits) val dataBypassValid = p.bypassGen generate Bool() val dataBypass = p.bypassGen generate Bits(p.cpuDataWidth bits) - val mmuBus = MemoryTranslatorBus() + val mmuBus = MemoryTranslatorBus(tlbWayCount) val physicalAddress = UInt(p.addressWidth bits) val cacheMiss, error, mmuRefilling, mmuException, isUser = ifGen(!p.twoCycleCache)(Bool) val haltIt = Bool() //Used to wait on the MMU rsp busy @@ -141,9 +141,9 @@ case class InstructionCacheCpuDecode(p : InstructionCacheConfig) extends Bundle } } -case class InstructionCacheCpuBus(p : InstructionCacheConfig) extends Bundle with IMasterSlave{ +case class InstructionCacheCpuBus(p : InstructionCacheConfig, tlbWayCount : Int) extends Bundle with IMasterSlave{ val prefetch = InstructionCacheCpuPrefetch(p) - val fetch = InstructionCacheCpuFetch(p) + val fetch = InstructionCacheCpuFetch(p, tlbWayCount) val decode = InstructionCacheCpuDecode(p) val fill = Flow(UInt(p.addressWidth bits)) @@ -277,11 +277,11 @@ case class InstructionCacheFlushBus() extends Bundle with IMasterSlave{ } } -class InstructionCache(p : InstructionCacheConfig) extends Component{ +class InstructionCache(p : InstructionCacheConfig, tlbWayCount : Int) extends Component{ import p._ val io = new Bundle{ val flush = in Bool() - val cpu = slave(InstructionCacheCpuBus(p)) + val cpu = slave(InstructionCacheCpuBus(p, tlbWayCount)) val mem = master(InstructionCacheMemBus(p)) } diff --git a/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala b/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala index 17c429c..3855c11 100644 --- a/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala +++ b/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala @@ -171,9 +171,12 @@ class DBusCachedPlugin(val config : DataCacheConfig, import pipeline.config._ - val cache = new DataCache(this.config.copy( - mergeExecuteMemory = writeBack == null - )) + val cache = new DataCache( + this.config.copy( + mergeExecuteMemory = writeBack == null + ), + tlbWayCount = mmuBus.rsp.wayCount + ) //Interconnect the plugin dBus with the cache dBus with some optional pipelining def optionPipe[T](cond : Boolean, on : T)(f : T => T) : T = if(cond) f(on) else on diff --git a/src/main/scala/vexriscv/plugin/DBusSimplePlugin.scala b/src/main/scala/vexriscv/plugin/DBusSimplePlugin.scala index 4130691..5b75052 100644 --- a/src/main/scala/vexriscv/plugin/DBusSimplePlugin.scala +++ b/src/main/scala/vexriscv/plugin/DBusSimplePlugin.scala @@ -298,7 +298,6 @@ class DBusSimplePlugin(catchAddressMisaligned : Boolean = false, object MEMORY_ADDRESS_LOW extends Stageable(UInt(2 bits)) object ALIGNEMENT_FAULT extends Stageable(Bool) object MMU_FAULT extends Stageable(Bool) - object MMU_RSP extends Stageable(MemoryTranslatorRsp()) object MEMORY_ATOMIC extends Stageable(Bool) object ATOMIC_HIT extends Stageable(Bool) object MEMORY_STORE extends Stageable(Bool) @@ -393,6 +392,8 @@ class DBusSimplePlugin(catchAddressMisaligned : Boolean = false, import pipeline._ import pipeline.config._ + object MMU_RSP extends Stageable(MemoryTranslatorRsp(mmuBus.rsp.wayCount)) + dBus = master(DBusSimpleBus()).setName("dBus") diff --git a/src/main/scala/vexriscv/plugin/IBusCachedPlugin.scala b/src/main/scala/vexriscv/plugin/IBusCachedPlugin.scala index 642fcbb..ede324c 100644 --- a/src/main/scala/vexriscv/plugin/IBusCachedPlugin.scala +++ b/src/main/scala/vexriscv/plugin/IBusCachedPlugin.scala @@ -124,7 +124,7 @@ class IBusCachedPlugin(resetVector : BigInt = 0x80000000l, import pipeline.config._ pipeline plug new FetchArea(pipeline) { - val cache = new InstructionCache(IBusCachedPlugin.this.config.copy(bypassGen = tightlyGen)) + val cache = new InstructionCache(IBusCachedPlugin.this.config.copy(bypassGen = tightlyGen), mmuBus.rsp.wayCount) iBus = master(new InstructionCacheMemBus(IBusCachedPlugin.this.config)).setName("iBus") iBus <> cache.io.mem iBus.cmd.address.allowOverride := cache.io.mem.cmd.address diff --git a/src/main/scala/vexriscv/plugin/MemoryTranslatorPlugin.scala b/src/main/scala/vexriscv/plugin/MemoryTranslatorPlugin.scala index 623d872..903d93c 100644 --- a/src/main/scala/vexriscv/plugin/MemoryTranslatorPlugin.scala +++ b/src/main/scala/vexriscv/plugin/MemoryTranslatorPlugin.scala @@ -22,8 +22,8 @@ class MemoryTranslatorPlugin(tlbSize : Int, val portsInfo = ArrayBuffer[MemoryTranslatorPort]() override def newTranslationPort(priority : Int,args : Any): MemoryTranslatorBus = { -// val exceptionBus = pipeline.service(classOf[ExceptionService]).newExceptionPort(stage) - val port = MemoryTranslatorPort(MemoryTranslatorBus(),priority,args.asInstanceOf[MemoryTranslatorPortConfig]/*,exceptionBus*/) + val config = args.asInstanceOf[MemoryTranslatorPortConfig] + val port = MemoryTranslatorPort(MemoryTranslatorBus(0),priority, config/*,exceptionBus*/) portsInfo += port port.bus } diff --git a/src/main/scala/vexriscv/plugin/MmuPlugin.scala b/src/main/scala/vexriscv/plugin/MmuPlugin.scala index 9dedde5..4c5083f 100644 --- a/src/main/scala/vexriscv/plugin/MmuPlugin.scala +++ b/src/main/scala/vexriscv/plugin/MmuPlugin.scala @@ -47,7 +47,8 @@ class MmuPlugin(ioRange : UInt => Bool, val portsInfo = ArrayBuffer[MmuPort]() override def newTranslationPort(priority : Int,args : Any): MemoryTranslatorBus = { - val port = MmuPort(MemoryTranslatorBus(),priority,args.asInstanceOf[MmuPortConfig], portsInfo.length) + val config = args.asInstanceOf[MmuPortConfig] + val port = MmuPort(MemoryTranslatorBus(config.portTlbSize),priority, config, portsInfo.length) portsInfo += port port.bus } @@ -71,7 +72,7 @@ class MmuPlugin(ioRange : UInt => Bool, val csrService = pipeline.service(classOf[CsrInterface]) //Sorted by priority - val sortedPortsInfo = portsInfo.sortWith((a,b) => a.priority > b.priority) + val sortedPortsInfo = portsInfo.sortBy(_.priority) case class CacheLine() extends Bundle { val valid, exception, superPage = Bool @@ -137,6 +138,12 @@ class MmuPlugin(ioRange : UInt => Bool, } port.bus.rsp.isIoAccess := ioRange(port.bus.rsp.physicalAddress) + port.bus.rsp.bypassTranslation := !requireMmuLockup + for(wayId <- 0 until port.args.portTlbSize){ + port.bus.rsp.ways(wayId).sel := cacheHits(wayId) + port.bus.rsp.ways(wayId).physical := cache(wayId).physicalAddress(1) @@ (cache(wayId).superPage ? port.bus.cmd.virtualAddress(21 downto 12) | cache(wayId).physicalAddress(0)) @@ port.bus.cmd.virtualAddress(11 downto 0) + } + // Avoid keeping any invalid line in the cache after an exception. // https://github.com/riscv/riscv-linux/blob/8fe28cb58bcb235034b64cbbb7550a8a43fd88be/arch/riscv/include/asm/pgtable.h#L276 when(service(classOf[IContextSwitching]).isContextSwitching) { @@ -154,21 +161,23 @@ class MmuPlugin(ioRange : UInt => Bool, } val state = RegInit(State.IDLE) val vpn = Reg(Vec(UInt(10 bits), UInt(10 bits))) - val portId = Reg(UInt(log2Up(portsInfo.length) bits)) + val portSortedOh = Reg(Bits(portsInfo.length bits)) case class PTE() extends Bundle { val V, R, W ,X, U, G, A, D = Bool() val RSW = Bits(2 bits) val PPN0 = UInt(10 bits) val PPN1 = UInt(12 bits) } + + val dBusRspStaged = dBusAccess.rsp.stage() val dBusRsp = new Area{ val pte = PTE() - pte.assignFromBits(dBusAccess.rsp.data) - val exception = !pte.V || (!pte.R && pte.W) || dBusAccess.rsp.error + pte.assignFromBits(dBusRspStaged.data) + val exception = !pte.V || (!pte.R && pte.W) || dBusRspStaged.error val leaf = pte.R || pte.X } - val pteBuffer = RegNextWhen(dBusRsp.pte, dBusAccess.rsp.valid && !dBusAccess.rsp.redo) + val pteBuffer = RegNextWhen(dBusRsp.pte, dBusRspStaged.valid && !dBusRspStaged.redo) dBusAccess.cmd.valid := False dBusAccess.cmd.write := False @@ -176,16 +185,25 @@ class MmuPlugin(ioRange : UInt => Bool, dBusAccess.cmd.address.assignDontCare() dBusAccess.cmd.data.assignDontCare() dBusAccess.cmd.writeMask.assignDontCare() + + val refills = OHMasking.last(B(sortedPortsInfo.map(port => port.bus.cmd.isValid && port.bus.rsp.refilling))) switch(state){ is(State.IDLE){ - for(port <- portsInfo.sortBy(_.priority)){ - when(port.bus.cmd.isValid && port.bus.rsp.refilling){ - vpn(1) := port.bus.cmd.virtualAddress(31 downto 22) - vpn(0) := port.bus.cmd.virtualAddress(21 downto 12) - portId := port.id - state := State.L1_CMD - } + when(refills.orR){ + portSortedOh := refills + state := State.L1_CMD + val address = MuxOH(refills, sortedPortsInfo.map(_.bus.cmd.virtualAddress)) + vpn(1) := address(31 downto 22) + vpn(0) := address(21 downto 12) } +// for(port <- portsInfo.sortBy(_.priority)){ +// when(port.bus.cmd.isValid && port.bus.rsp.refilling){ +// vpn(1) := port.bus.cmd.virtualAddress(31 downto 22) +// vpn(0) := port.bus.cmd.virtualAddress(21 downto 12) +// portId := port.id +// state := State.L1_CMD +// } +// } } is(State.L1_CMD){ dBusAccess.cmd.valid := True @@ -195,12 +213,12 @@ class MmuPlugin(ioRange : UInt => Bool, } } is(State.L1_RSP){ - when(dBusAccess.rsp.valid){ + when(dBusRspStaged.valid){ state := State.L0_CMD when(dBusRsp.leaf || dBusRsp.exception){ state := State.IDLE } - when(dBusAccess.rsp.redo){ + when(dBusRspStaged.redo){ state := State.L1_CMD } } @@ -213,22 +231,22 @@ class MmuPlugin(ioRange : UInt => Bool, } } is(State.L0_RSP){ - when(dBusAccess.rsp.valid) { + when(dBusRspStaged.valid) { state := State.IDLE - when(dBusAccess.rsp.redo){ + when(dBusRspStaged.redo){ state := State.L0_CMD } } } } - for(port <- ports) { - port.handle.bus.busy := state =/= State.IDLE && portId === port.id + for((port, id) <- sortedPortsInfo.zipWithIndex) { + port.bus.busy := state =/= State.IDLE && portSortedOh(id) } - when(dBusAccess.rsp.valid && !dBusAccess.rsp.redo && (dBusRsp.leaf || dBusRsp.exception)){ - for(port <- ports){ - when(portId === port.id) { + when(dBusRspStaged.valid && !dBusRspStaged.redo && (dBusRsp.leaf || dBusRsp.exception)){ + for((port, id) <- ports.zipWithIndex) { + when(portSortedOh(id)) { port.entryToReplace.increment() for ((line, lineId) <- port.cache.zipWithIndex) { when(port.entryToReplace === lineId){ diff --git a/src/main/scala/vexriscv/plugin/StaticMemoryTranslatorPlugin.scala b/src/main/scala/vexriscv/plugin/StaticMemoryTranslatorPlugin.scala index 351ebc5..6f626e7 100644 --- a/src/main/scala/vexriscv/plugin/StaticMemoryTranslatorPlugin.scala +++ b/src/main/scala/vexriscv/plugin/StaticMemoryTranslatorPlugin.scala @@ -11,8 +11,7 @@ class StaticMemoryTranslatorPlugin(ioRange : UInt => Bool) extends Plugin[VexRis val portsInfo = ArrayBuffer[StaticMemoryTranslatorPort]() override def newTranslationPort(priority : Int,args : Any): MemoryTranslatorBus = { -// val exceptionBus = pipeline.service(classOf[ExceptionService]).newExceptionPort(stage) - val port = StaticMemoryTranslatorPort(MemoryTranslatorBus(),priority) + val port = StaticMemoryTranslatorPort(MemoryTranslatorBus(0),priority) portsInfo += port port.bus } diff --git a/src/test/scala/vexriscv/TestIndividualFeatures.scala b/src/test/scala/vexriscv/TestIndividualFeatures.scala index eafd1d9..c31a30a 100644 --- a/src/test/scala/vexriscv/TestIndividualFeatures.scala +++ b/src/test/scala/vexriscv/TestIndividualFeatures.scala @@ -436,13 +436,14 @@ class DBusDimension extends VexRiscvDimension("DBus") { val dBusRspSlavePipe = r.nextBoolean() || withSmp val relaxedMemoryTranslationRegister = r.nextBoolean() val earlyWaysHits = r.nextBoolean() && !noWriteBack + val directTlbHit = r.nextBoolean() && mmuConfig.isInstanceOf[MmuPortConfig] val dBusCmdMasterPipe, dBusCmdSlavePipe = false //As it create test bench issues do{ cacheSize = 512 << r.nextInt(5) wayCount = 1 << r.nextInt(3) }while(cacheSize/wayCount < 512 || (catchAll && cacheSize/wayCount > 4096)) - new VexRiscvPosition(s"Cached${memDataWidth}d" + "S" + cacheSize + "W" + wayCount + "BPL" + bytePerLine + (if(dBusCmdMasterPipe) "Cmp " else "") + (if(dBusCmdSlavePipe) "Csp " else "") + (if(dBusRspSlavePipe) "Rsp " else "") + (if(relaxedMemoryTranslationRegister) "Rmtr " else "") + (if(earlyWaysHits) "Ewh " else "") + (if(withAmo) "Amo " else "") + (if(withSmp) "Smp " else "")) { + new VexRiscvPosition(s"Cached${memDataWidth}d" + "S" + cacheSize + "W" + wayCount + "BPL" + bytePerLine + (if(dBusCmdMasterPipe) "Cmp " else "") + (if(dBusCmdSlavePipe) "Csp " else "") + (if(dBusRspSlavePipe) "Rsp " else "") + (if(relaxedMemoryTranslationRegister) "Rmtr " else "") + (if(earlyWaysHits) "Ewh " else "") + (if(withAmo) "Amo " else "") + (if(withSmp) "Smp " else "") + (if(directTlbHit) "Dtlb " else "")) { override def testParam = s"DBUS=CACHED DBUS_DATA_WIDTH=$memDataWidth " + (if(withLrSc) "LRSC=yes " else "") + (if(withAmo) "AMO=yes " else "") + (if(withSmp) "DBUS_EXCLUSIVE=yes DBUS_INVALIDATE=yes " else "") override def applyOn(config: VexRiscvConfig): Unit = { @@ -461,7 +462,8 @@ class DBusDimension extends VexRiscvDimension("DBus") { withAmo = withAmo, earlyWaysHits = earlyWaysHits, withExclusive = withSmp, - withInvalidate = withSmp + withInvalidate = withSmp, + directTlbHit = directTlbHit ), dBusCmdMasterPipe = dBusCmdMasterPipe, dBusCmdSlavePipe = dBusCmdSlavePipe, From fc0f3a2020a4cbc49ca51b50d942cdc924cef330 Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Wed, 6 May 2020 18:05:20 +0200 Subject: [PATCH 57/91] cleanup mmu interface --- src/main/scala/vexriscv/Services.scala | 10 +++++----- src/main/scala/vexriscv/ip/DataCache.scala | 12 ++++++------ src/main/scala/vexriscv/ip/InstructionCache.scala | 12 ++++++------ .../scala/vexriscv/plugin/DBusCachedPlugin.scala | 2 +- .../scala/vexriscv/plugin/DBusSimplePlugin.scala | 2 +- .../scala/vexriscv/plugin/IBusCachedPlugin.scala | 2 +- .../vexriscv/plugin/MemoryTranslatorPlugin.scala | 2 +- src/main/scala/vexriscv/plugin/MmuPlugin.scala | 2 +- .../plugin/StaticMemoryTranslatorPlugin.scala | 2 +- 9 files changed, 23 insertions(+), 23 deletions(-) diff --git a/src/main/scala/vexriscv/Services.scala b/src/main/scala/vexriscv/Services.scala index 1c9a2ae..5bf763e 100644 --- a/src/main/scala/vexriscv/Services.scala +++ b/src/main/scala/vexriscv/Services.scala @@ -68,24 +68,24 @@ case class MemoryTranslatorCmd() extends Bundle{ val virtualAddress = UInt(32 bits) val bypassTranslation = Bool } -case class MemoryTranslatorRsp(wayCount : Int) extends Bundle{ +case class MemoryTranslatorRsp(p : MemoryTranslatorBusParameter) extends Bundle{ val physicalAddress = UInt(32 bits) val isIoAccess = Bool val allowRead, allowWrite, allowExecute = Bool val exception = Bool val refilling = Bool val bypassTranslation = Bool - val ways = Vec(MemoryTranslatorRspWay(), wayCount) + val ways = Vec(MemoryTranslatorRspWay(), p.wayCount) } case class MemoryTranslatorRspWay() extends Bundle{ val sel = Bool() val physical = UInt(32 bits) } - -case class MemoryTranslatorBus(wayCount : Int) extends Bundle with IMasterSlave{ +case class MemoryTranslatorBusParameter(wayCount : Int) +case class MemoryTranslatorBus(p : MemoryTranslatorBusParameter) extends Bundle with IMasterSlave{ val cmd = MemoryTranslatorCmd() - val rsp = MemoryTranslatorRsp(wayCount) + val rsp = MemoryTranslatorRsp(p) val end = Bool val busy = Bool diff --git a/src/main/scala/vexriscv/ip/DataCache.scala b/src/main/scala/vexriscv/ip/DataCache.scala index 657b8c0..a99a929 100644 --- a/src/main/scala/vexriscv/ip/DataCache.scala +++ b/src/main/scala/vexriscv/ip/DataCache.scala @@ -125,13 +125,13 @@ case class DataCacheCpuExecuteArgs(p : DataCacheConfig) extends Bundle{ val totalyConsistent = Bool() //Only for AMO/LRSC } -case class DataCacheCpuMemory(p : DataCacheConfig, tlbWayCount : Int) extends Bundle with IMasterSlave{ +case class DataCacheCpuMemory(p : DataCacheConfig, mmu : MemoryTranslatorBusParameter) extends Bundle with IMasterSlave{ val isValid = Bool val isStuck = Bool val isRemoved = Bool val isWrite = Bool val address = UInt(p.addressWidth bit) - val mmuBus = MemoryTranslatorBus(tlbWayCount) + val mmuBus = MemoryTranslatorBus(mmu) override def asMaster(): Unit = { out(isValid, isStuck, isRemoved, address) @@ -175,9 +175,9 @@ case class DataCacheCpuWriteBack(p : DataCacheConfig) extends Bundle with IMaste } } -case class DataCacheCpuBus(p : DataCacheConfig, tlbWayCount : Int) extends Bundle with IMasterSlave{ +case class DataCacheCpuBus(p : DataCacheConfig, mmu : MemoryTranslatorBusParameter) extends Bundle with IMasterSlave{ val execute = DataCacheCpuExecute(p) - val memory = DataCacheCpuMemory(p, tlbWayCount) + val memory = DataCacheCpuMemory(p, mmu) val writeBack = DataCacheCpuWriteBack(p) val redo = Bool() @@ -423,11 +423,11 @@ object DataCacheExternalAmoStates extends SpinalEnum{ } //If external amo, mem rsp should stay -class DataCache(val p : DataCacheConfig, tlbWayCount : Int) extends Component{ +class DataCache(val p : DataCacheConfig, mmuParameter : MemoryTranslatorBusParameter) extends Component{ import p._ val io = new Bundle{ - val cpu = slave(DataCacheCpuBus(p, tlbWayCount)) + val cpu = slave(DataCacheCpuBus(p, mmuParameter)) val mem = master(DataCacheMemBus(p)) } diff --git a/src/main/scala/vexriscv/ip/InstructionCache.scala b/src/main/scala/vexriscv/ip/InstructionCache.scala index d684298..aedd6af 100644 --- a/src/main/scala/vexriscv/ip/InstructionCache.scala +++ b/src/main/scala/vexriscv/ip/InstructionCache.scala @@ -104,7 +104,7 @@ trait InstructionCacheCommons{ val cacheMiss, error, mmuRefilling, mmuException, isUser : Bool } -case class InstructionCacheCpuFetch(p : InstructionCacheConfig, tlbWayCount : Int) extends Bundle with IMasterSlave with InstructionCacheCommons { +case class InstructionCacheCpuFetch(p : InstructionCacheConfig, mmuParameter : MemoryTranslatorBusParameter) extends Bundle with IMasterSlave with InstructionCacheCommons { val isValid = Bool() val isStuck = Bool() val isRemoved = Bool() @@ -112,7 +112,7 @@ case class InstructionCacheCpuFetch(p : InstructionCacheConfig, tlbWayCount : In val data = Bits(p.cpuDataWidth bits) val dataBypassValid = p.bypassGen generate Bool() val dataBypass = p.bypassGen generate Bits(p.cpuDataWidth bits) - val mmuBus = MemoryTranslatorBus(tlbWayCount) + val mmuBus = MemoryTranslatorBus(mmuParameter) val physicalAddress = UInt(p.addressWidth bits) val cacheMiss, error, mmuRefilling, mmuException, isUser = ifGen(!p.twoCycleCache)(Bool) val haltIt = Bool() //Used to wait on the MMU rsp busy @@ -141,9 +141,9 @@ case class InstructionCacheCpuDecode(p : InstructionCacheConfig) extends Bundle } } -case class InstructionCacheCpuBus(p : InstructionCacheConfig, tlbWayCount : Int) extends Bundle with IMasterSlave{ +case class InstructionCacheCpuBus(p : InstructionCacheConfig, mmuParameter : MemoryTranslatorBusParameter) extends Bundle with IMasterSlave{ val prefetch = InstructionCacheCpuPrefetch(p) - val fetch = InstructionCacheCpuFetch(p, tlbWayCount) + val fetch = InstructionCacheCpuFetch(p, mmuParameter) val decode = InstructionCacheCpuDecode(p) val fill = Flow(UInt(p.addressWidth bits)) @@ -277,11 +277,11 @@ case class InstructionCacheFlushBus() extends Bundle with IMasterSlave{ } } -class InstructionCache(p : InstructionCacheConfig, tlbWayCount : Int) extends Component{ +class InstructionCache(p : InstructionCacheConfig, mmuParameter : MemoryTranslatorBusParameter) extends Component{ import p._ val io = new Bundle{ val flush = in Bool() - val cpu = slave(InstructionCacheCpuBus(p, tlbWayCount)) + val cpu = slave(InstructionCacheCpuBus(p, mmuParameter)) val mem = master(InstructionCacheMemBus(p)) } diff --git a/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala b/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala index 3855c11..96e1b1b 100644 --- a/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala +++ b/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala @@ -175,7 +175,7 @@ class DBusCachedPlugin(val config : DataCacheConfig, this.config.copy( mergeExecuteMemory = writeBack == null ), - tlbWayCount = mmuBus.rsp.wayCount + mmuParameter = mmuBus.p ) //Interconnect the plugin dBus with the cache dBus with some optional pipelining diff --git a/src/main/scala/vexriscv/plugin/DBusSimplePlugin.scala b/src/main/scala/vexriscv/plugin/DBusSimplePlugin.scala index 5b75052..819fc8f 100644 --- a/src/main/scala/vexriscv/plugin/DBusSimplePlugin.scala +++ b/src/main/scala/vexriscv/plugin/DBusSimplePlugin.scala @@ -392,7 +392,7 @@ class DBusSimplePlugin(catchAddressMisaligned : Boolean = false, import pipeline._ import pipeline.config._ - object MMU_RSP extends Stageable(MemoryTranslatorRsp(mmuBus.rsp.wayCount)) + object MMU_RSP extends Stageable(MemoryTranslatorRsp(mmuBus.p)) dBus = master(DBusSimpleBus()).setName("dBus") diff --git a/src/main/scala/vexriscv/plugin/IBusCachedPlugin.scala b/src/main/scala/vexriscv/plugin/IBusCachedPlugin.scala index ede324c..be08cda 100644 --- a/src/main/scala/vexriscv/plugin/IBusCachedPlugin.scala +++ b/src/main/scala/vexriscv/plugin/IBusCachedPlugin.scala @@ -124,7 +124,7 @@ class IBusCachedPlugin(resetVector : BigInt = 0x80000000l, import pipeline.config._ pipeline plug new FetchArea(pipeline) { - val cache = new InstructionCache(IBusCachedPlugin.this.config.copy(bypassGen = tightlyGen), mmuBus.rsp.wayCount) + val cache = new InstructionCache(IBusCachedPlugin.this.config.copy(bypassGen = tightlyGen), mmuBus.p) iBus = master(new InstructionCacheMemBus(IBusCachedPlugin.this.config)).setName("iBus") iBus <> cache.io.mem iBus.cmd.address.allowOverride := cache.io.mem.cmd.address diff --git a/src/main/scala/vexriscv/plugin/MemoryTranslatorPlugin.scala b/src/main/scala/vexriscv/plugin/MemoryTranslatorPlugin.scala index 903d93c..c8c00ec 100644 --- a/src/main/scala/vexriscv/plugin/MemoryTranslatorPlugin.scala +++ b/src/main/scala/vexriscv/plugin/MemoryTranslatorPlugin.scala @@ -23,7 +23,7 @@ class MemoryTranslatorPlugin(tlbSize : Int, override def newTranslationPort(priority : Int,args : Any): MemoryTranslatorBus = { val config = args.asInstanceOf[MemoryTranslatorPortConfig] - val port = MemoryTranslatorPort(MemoryTranslatorBus(0),priority, config/*,exceptionBus*/) + val port = MemoryTranslatorPort(MemoryTranslatorBus(MemoryTranslatorBusParameter(wayCount = 0)),priority, config/*,exceptionBus*/) portsInfo += port port.bus } diff --git a/src/main/scala/vexriscv/plugin/MmuPlugin.scala b/src/main/scala/vexriscv/plugin/MmuPlugin.scala index 4c5083f..884a2d5 100644 --- a/src/main/scala/vexriscv/plugin/MmuPlugin.scala +++ b/src/main/scala/vexriscv/plugin/MmuPlugin.scala @@ -48,7 +48,7 @@ class MmuPlugin(ioRange : UInt => Bool, override def newTranslationPort(priority : Int,args : Any): MemoryTranslatorBus = { val config = args.asInstanceOf[MmuPortConfig] - val port = MmuPort(MemoryTranslatorBus(config.portTlbSize),priority, config, portsInfo.length) + val port = MmuPort(MemoryTranslatorBus(MemoryTranslatorBusParameter(wayCount = config.portTlbSize)),priority, config, portsInfo.length) portsInfo += port port.bus } diff --git a/src/main/scala/vexriscv/plugin/StaticMemoryTranslatorPlugin.scala b/src/main/scala/vexriscv/plugin/StaticMemoryTranslatorPlugin.scala index 6f626e7..cbe55f9 100644 --- a/src/main/scala/vexriscv/plugin/StaticMemoryTranslatorPlugin.scala +++ b/src/main/scala/vexriscv/plugin/StaticMemoryTranslatorPlugin.scala @@ -11,7 +11,7 @@ class StaticMemoryTranslatorPlugin(ioRange : UInt => Bool) extends Plugin[VexRis val portsInfo = ArrayBuffer[StaticMemoryTranslatorPort]() override def newTranslationPort(priority : Int,args : Any): MemoryTranslatorBus = { - val port = StaticMemoryTranslatorPort(MemoryTranslatorBus(0),priority) + val port = StaticMemoryTranslatorPort(MemoryTranslatorBus(MemoryTranslatorBusParameter(wayCount = 0)),priority) portsInfo += port port.bus } From 8e025aeeaa85c5919ab84f1c75bba4112a7a2296 Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Thu, 7 May 2020 13:18:11 +0200 Subject: [PATCH 58/91] more litex smp cluster pipelining --- .../scala/vexriscv/demo/smp/VexRiscvSmpLitexCluster.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexCluster.scala b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexCluster.scala index bb02e60..9bfca0b 100644 --- a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexCluster.scala +++ b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexCluster.scala @@ -305,7 +305,7 @@ case class VexRiscvLitexSmpCluster(p : VexRiscvLitexSmpClusterParameter, capabilities = Seq(iBusArbiterParameter, iBusArbiterParameter), pendingMax = 15 ) - iBusDecoder.io.input << iBusArbiter.io.output + iBusDecoder.io.input << iBusArbiter.io.output.pipelined(cmdValid = true) val iMem = LiteDramNative(p.liteDram) val iMemBridge = iMem.fromBmb(iBusDecoder.io.outputs(1), wdataFifoSize = 0, rdataFifoSize = 32) @@ -350,8 +350,8 @@ object VexRiscvLitexSmpClusterGen extends App { debugClockDomain = ClockDomain.current.copy(reset = Bool().setName("debugResetIn")) ) - SpinalVerilog(Bench.compressIo(dutGen)) -// SpinalVerilog(dutGen) +// SpinalVerilog(Bench.compressIo(dutGen)) + SpinalVerilog(dutGen) } From 41ee8fd2265e533784b33c50ef1572ec776cc9ed Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Thu, 7 May 2020 13:37:53 +0200 Subject: [PATCH 59/91] MmuPlugin now support multiple stages, D$ can now take advantage of that --- src/main/scala/vexriscv/Services.scala | 5 +- src/main/scala/vexriscv/TestsWorkspace.scala | 6 ++- .../demo/smp/VexRiscvSmpCluster.scala | 5 +- src/main/scala/vexriscv/ip/DataCache.scala | 23 ++++---- .../scala/vexriscv/ip/InstructionCache.scala | 6 +-- .../vexriscv/plugin/DBusCachedPlugin.scala | 24 +++++++-- .../vexriscv/plugin/DBusSimplePlugin.scala | 7 +-- .../vexriscv/plugin/IBusCachedPlugin.scala | 4 +- .../vexriscv/plugin/IBusSimplePlugin.scala | 6 +-- .../plugin/MemoryTranslatorPlugin.scala | 12 ++--- .../scala/vexriscv/plugin/MmuPlugin.scala | 52 +++++++++++++------ .../plugin/StaticMemoryTranslatorPlugin.scala | 2 +- .../vexriscv/TestIndividualFeatures.scala | 7 +-- 13 files changed, 97 insertions(+), 62 deletions(-) diff --git a/src/main/scala/vexriscv/Services.scala b/src/main/scala/vexriscv/Services.scala index 5bf763e..51dbe6b 100644 --- a/src/main/scala/vexriscv/Services.scala +++ b/src/main/scala/vexriscv/Services.scala @@ -65,6 +65,7 @@ trait RegFileService{ case class MemoryTranslatorCmd() extends Bundle{ val isValid = Bool + val isStuck = Bool val virtualAddress = UInt(32 bits) val bypassTranslation = Bool } @@ -82,9 +83,9 @@ case class MemoryTranslatorRspWay() extends Bundle{ val physical = UInt(32 bits) } -case class MemoryTranslatorBusParameter(wayCount : Int) +case class MemoryTranslatorBusParameter(wayCount : Int = 0, latency : Int = 0) case class MemoryTranslatorBus(p : MemoryTranslatorBusParameter) extends Bundle with IMasterSlave{ - val cmd = MemoryTranslatorCmd() + val cmd = Vec(MemoryTranslatorCmd(), p.latency + 1) val rsp = MemoryTranslatorRsp(p) val end = Bool val busy = Bool diff --git a/src/main/scala/vexriscv/TestsWorkspace.scala b/src/main/scala/vexriscv/TestsWorkspace.scala index 40f8b33..b97936d 100644 --- a/src/main/scala/vexriscv/TestsWorkspace.scala +++ b/src/main/scala/vexriscv/TestsWorkspace.scala @@ -73,7 +73,8 @@ object TestsWorkspace { // ) ), memoryTranslatorPortConfig = MmuPortConfig( - portTlbSize = 4 + portTlbSize = 4, + latency = 0 ) ), // ).newTightlyCoupledPort(TightlyCoupledPortParameter("iBusTc", a => a(30 downto 28) === 0x0 && a(5))), @@ -108,7 +109,8 @@ object TestsWorkspace { // ) ), memoryTranslatorPortConfig = MmuPortConfig( - portTlbSize = 4 + portTlbSize = 4, + latency = 1 ) ), diff --git a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala index 2f694cc..3f23a23 100644 --- a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala +++ b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala @@ -193,7 +193,10 @@ object VexRiscvSmpClusterGen { // ) ), memoryTranslatorPortConfig = MmuPortConfig( - portTlbSize = 4 + portTlbSize = 4, + latency = 1, + earlyRequireMmuLockup = true, + earlyCacheHits = true ) ), diff --git a/src/main/scala/vexriscv/ip/DataCache.scala b/src/main/scala/vexriscv/ip/DataCache.scala index a99a929..2f2e8c2 100644 --- a/src/main/scala/vexriscv/ip/DataCache.scala +++ b/src/main/scala/vexriscv/ip/DataCache.scala @@ -128,15 +128,14 @@ case class DataCacheCpuExecuteArgs(p : DataCacheConfig) extends Bundle{ case class DataCacheCpuMemory(p : DataCacheConfig, mmu : MemoryTranslatorBusParameter) extends Bundle with IMasterSlave{ val isValid = Bool val isStuck = Bool - val isRemoved = Bool val isWrite = Bool val address = UInt(p.addressWidth bit) - val mmuBus = MemoryTranslatorBus(mmu) + val mmuRsp = MemoryTranslatorRsp(mmu) override def asMaster(): Unit = { - out(isValid, isStuck, isRemoved, address) + out(isValid, isStuck, address) in(isWrite) - slave(mmuBus) + out(mmuRsp) } } @@ -619,10 +618,6 @@ class DataCache(val p : DataCacheConfig, mmuParameter : MemoryTranslatorBusParam def stagePipe[T <: Data](that : T) = if(mergeExecuteMemory) CombInit(that) else RegNextWhen(that, !io.cpu.memory.isStuck) val request = stagePipe(io.cpu.execute.args) val mask = stagePipe(stage0.mask) - io.cpu.memory.mmuBus.cmd.isValid := io.cpu.memory.isValid - io.cpu.memory.mmuBus.cmd.virtualAddress := io.cpu.memory.address - io.cpu.memory.mmuBus.cmd.bypassTranslation := False - io.cpu.memory.mmuBus.end := !io.cpu.memory.isStuck || io.cpu.memory.isRemoved io.cpu.memory.isWrite := request.wr val isAmo = if(withAmo) request.isAmo else False @@ -634,8 +629,8 @@ class DataCache(val p : DataCacheConfig, mmuParameter : MemoryTranslatorBusParam val o = CombInit(sync.w2o.busy) val i = CombInit(sync.w2i.busy) - val s = io.cpu.memory.mmuBus.rsp.isIoAccess ? o | w - val l = io.cpu.memory.mmuBus.rsp.isIoAccess ? i | r + val s = io.cpu.memory.mmuRsp.isIoAccess ? o | w + val l = io.cpu.memory.mmuRsp.isIoAccess ? i | r when(isAmo? (s || l) | (request.wr ? s | l)){ hazard := True @@ -647,15 +642,15 @@ class DataCache(val p : DataCacheConfig, mmuParameter : MemoryTranslatorBusParam val wayHits = earlyWaysHits generate Bits(wayCount bits) val indirectTlbHitGen = (earlyWaysHits && !directTlbHit) generate new Area { - wayHits := B(ways.map(way => (io.cpu.memory.mmuBus.rsp.physicalAddress(tagRange) === way.tagsReadRsp.address && way.tagsReadRsp.valid))) + wayHits := B(ways.map(way => (io.cpu.memory.mmuRsp.physicalAddress(tagRange) === way.tagsReadRsp.address && way.tagsReadRsp.valid))) } val directTlbHitGen = (earlyWaysHits && directTlbHit) generate new Area { - val wayTlbHits = for (way <- ways) yield for (tlb <- io.cpu.memory.mmuBus.rsp.ways) yield { + val wayTlbHits = for (way <- ways) yield for (tlb <- io.cpu.memory.mmuRsp.ways) yield { way.tagsReadRsp.address === tlb.physical(tagRange) && tlb.sel } val translatedHits = B(wayTlbHits.map(_.orR)) val bypassHits = B(ways.map(_.tagsReadRsp.address === io.cpu.memory.address(tagRange))) - wayHits := (io.cpu.memory.mmuBus.rsp.bypassTranslation ? bypassHits | translatedHits) & B(ways.map(_.tagsReadRsp.valid)) + wayHits := (io.cpu.memory.mmuRsp.bypassTranslation ? bypassHits | translatedHits) & B(ways.map(_.tagsReadRsp.valid)) } val dataMux = earlyDataMux generate MuxOH(wayHits, ways.map(_.dataReadRsp)) @@ -673,7 +668,7 @@ class DataCache(val p : DataCacheConfig, mmuParameter : MemoryTranslatorBusParam def ramPipe[T <: Data](that : T) = if(mergeExecuteMemory) CombInit(that) else RegNextWhen(that, !io.cpu.writeBack.isStuck) val request = RegNextWhen(stageA.request, !io.cpu.writeBack.isStuck) val mmuRspFreeze = False - val mmuRsp = RegNextWhen(io.cpu.memory.mmuBus.rsp, !io.cpu.writeBack.isStuck && !mmuRspFreeze) + val mmuRsp = RegNextWhen(io.cpu.memory.mmuRsp, !io.cpu.writeBack.isStuck && !mmuRspFreeze) val tagsReadRsp = ways.map(w => ramPipe(w.tagsReadRsp)) val dataReadRsp = !earlyDataMux generate ways.map(w => ramPipe(w.dataReadRsp)) val wayInvalidate = stagePipe(stageA. wayInvalidate) diff --git a/src/main/scala/vexriscv/ip/InstructionCache.scala b/src/main/scala/vexriscv/ip/InstructionCache.scala index aedd6af..6053c0f 100644 --- a/src/main/scala/vexriscv/ip/InstructionCache.scala +++ b/src/main/scala/vexriscv/ip/InstructionCache.scala @@ -429,9 +429,9 @@ class InstructionCache(p : InstructionCacheConfig, mmuParameter : MemoryTranslat io.cpu.fetch.data := (if(p.bypassGen) (io.cpu.fetch.dataBypassValid ? io.cpu.fetch.dataBypass | cacheData) else cacheData) } - io.cpu.fetch.mmuBus.cmd.isValid := io.cpu.fetch.isValid - io.cpu.fetch.mmuBus.cmd.virtualAddress := io.cpu.fetch.pc - io.cpu.fetch.mmuBus.cmd.bypassTranslation := False + io.cpu.fetch.mmuBus.cmd.last.isValid := io.cpu.fetch.isValid + io.cpu.fetch.mmuBus.cmd.last.virtualAddress := io.cpu.fetch.pc + io.cpu.fetch.mmuBus.cmd.last.bypassTranslation := False io.cpu.fetch.mmuBus.end := !io.cpu.fetch.isStuck || io.cpu.fetch.isRemoved io.cpu.fetch.physicalAddress := io.cpu.fetch.mmuBus.rsp.physicalAddress diff --git a/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala b/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala index 96e1b1b..f133616 100644 --- a/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala +++ b/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala @@ -170,6 +170,10 @@ class DBusCachedPlugin(val config : DataCacheConfig, import pipeline._ import pipeline.config._ + val twoStageMmu = mmuBus.p.latency match { + case 0 => false + case 1 => true + } val cache = new DataCache( this.config.copy( @@ -242,6 +246,12 @@ class DBusCachedPlugin(val config : DataCacheConfig, ) cache.io.cpu.execute.args.size := size + if(twoStageMmu) { + mmuBus.cmd(0).isValid := cache.io.cpu.execute.isValid + mmuBus.cmd(0).isStuck := arbitration.isStuck + mmuBus.cmd(0).virtualAddress := cache.io.cpu.execute.address + mmuBus.cmd(0).bypassTranslation := False + } cache.io.cpu.flush.valid := arbitration.isValid && input(MEMORY_MANAGMENT) cache.io.cpu.execute.args.totalyConsistent := input(MEMORY_FORCE_CONSTISTENCY) @@ -281,11 +291,15 @@ class DBusCachedPlugin(val config : DataCacheConfig, cache.io.cpu.memory.isValid := arbitration.isValid && input(MEMORY_ENABLE) cache.io.cpu.memory.isStuck := arbitration.isStuck - cache.io.cpu.memory.isRemoved := arbitration.removeIt cache.io.cpu.memory.address := (if(relaxedMemoryTranslationRegister) input(MEMORY_VIRTUAL_ADDRESS) else if(mmuAndBufferStage == execute) cache.io.cpu.execute.address else U(input(REGFILE_WRITE_DATA))) - cache.io.cpu.memory.mmuBus <> mmuBus - cache.io.cpu.memory.mmuBus.rsp.isIoAccess setWhen(pipeline(DEBUG_BYPASS_CACHE) && !cache.io.cpu.memory.isWrite) + mmuBus.cmd.last.isValid := cache.io.cpu.memory.isValid + mmuBus.cmd.last.isStuck := cache.io.cpu.memory.isStuck + mmuBus.cmd.last.virtualAddress := cache.io.cpu.memory.address + mmuBus.cmd.last.bypassTranslation := False + mmuBus.end := !arbitration.isStuck || arbitration.removeIt + cache.io.cpu.memory.mmuRsp := mmuBus.rsp + cache.io.cpu.memory.mmuRsp.isIoAccess setWhen(pipeline(DEBUG_BYPASS_CACHE) && !cache.io.cpu.memory.isWrite) } val managementStage = stages.last @@ -397,9 +411,9 @@ class DBusCachedPlugin(val config : DataCacheConfig, } } execute.insert(IS_DBUS_SHARING) := dBusAccess.cmd.fire + mmuBus.cmd.last.bypassTranslation setWhen(mmuAndBufferStage.input(IS_DBUS_SHARING)) + if(twoStageMmu) mmuBus.cmd(0).bypassTranslation setWhen(execute.input(IS_DBUS_SHARING)) - - mmuBus.cmd.bypassTranslation setWhen(mmuAndBufferStage.input(IS_DBUS_SHARING)) if(mmuAndBufferStage != execute) (cache.io.cpu.memory.isValid setWhen(mmuAndBufferStage.input(IS_DBUS_SHARING))) cache.io.cpu.writeBack.isValid setWhen(managementStage.input(IS_DBUS_SHARING)) dBusAccess.rsp.valid := managementStage.input(IS_DBUS_SHARING) && !cache.io.cpu.writeBack.isWrite && (cache.io.cpu.redo || !cache.io.cpu.writeBack.haltIt) diff --git a/src/main/scala/vexriscv/plugin/DBusSimplePlugin.scala b/src/main/scala/vexriscv/plugin/DBusSimplePlugin.scala index 819fc8f..ba896fa 100644 --- a/src/main/scala/vexriscv/plugin/DBusSimplePlugin.scala +++ b/src/main/scala/vexriscv/plugin/DBusSimplePlugin.scala @@ -449,9 +449,10 @@ class DBusSimplePlugin(catchAddressMisaligned : Boolean = false, insert(FORMAL_MEM_WDATA) := dBus.cmd.payload.data val mmu = (mmuBus != null) generate new Area { - mmuBus.cmd.isValid := arbitration.isValid && input(MEMORY_ENABLE) - mmuBus.cmd.virtualAddress := input(SRC_ADD).asUInt - mmuBus.cmd.bypassTranslation := False + mmuBus.cmd.last.isValid := arbitration.isValid && input(MEMORY_ENABLE) + mmuBus.cmd.last.isStuck := arbitration.isStuck + mmuBus.cmd.last.virtualAddress := input(SRC_ADD).asUInt + mmuBus.cmd.last.bypassTranslation := False mmuBus.end := !arbitration.isStuck || arbitration.isRemoved dBus.cmd.address := mmuBus.rsp.physicalAddress diff --git a/src/main/scala/vexriscv/plugin/IBusCachedPlugin.scala b/src/main/scala/vexriscv/plugin/IBusCachedPlugin.scala index be08cda..4d41790 100644 --- a/src/main/scala/vexriscv/plugin/IBusCachedPlugin.scala +++ b/src/main/scala/vexriscv/plugin/IBusCachedPlugin.scala @@ -124,7 +124,7 @@ class IBusCachedPlugin(resetVector : BigInt = 0x80000000l, import pipeline.config._ pipeline plug new FetchArea(pipeline) { - val cache = new InstructionCache(IBusCachedPlugin.this.config.copy(bypassGen = tightlyGen), mmuBus.p) + val cache = new InstructionCache(IBusCachedPlugin.this.config.copy(bypassGen = tightlyGen), if(mmuBus != null) mmuBus.p else MemoryTranslatorBusParameter(0,0)) iBus = master(new InstructionCacheMemBus(IBusCachedPlugin.this.config)).setName("iBus") iBus <> cache.io.mem iBus.cmd.address.allowOverride := cache.io.mem.cmd.address @@ -251,7 +251,7 @@ class IBusCachedPlugin(resetVector : BigInt = 0x80000000l, if (mmuBus != null) { cache.io.cpu.fetch.mmuBus <> mmuBus } else { - cache.io.cpu.fetch.mmuBus.rsp.physicalAddress := cache.io.cpu.fetch.mmuBus.cmd.virtualAddress + cache.io.cpu.fetch.mmuBus.rsp.physicalAddress := cache.io.cpu.fetch.mmuBus.cmd.last.virtualAddress cache.io.cpu.fetch.mmuBus.rsp.allowExecute := True cache.io.cpu.fetch.mmuBus.rsp.allowRead := True cache.io.cpu.fetch.mmuBus.rsp.allowWrite := True diff --git a/src/main/scala/vexriscv/plugin/IBusSimplePlugin.scala b/src/main/scala/vexriscv/plugin/IBusSimplePlugin.scala index b8bc978..19145f5 100644 --- a/src/main/scala/vexriscv/plugin/IBusSimplePlugin.scala +++ b/src/main/scala/vexriscv/plugin/IBusSimplePlugin.scala @@ -318,9 +318,9 @@ class IBusSimplePlugin( resetVector : BigInt, } val mmu = (mmuBus != null) generate new Area { - mmuBus.cmd.isValid := cmdForkStage.input.valid - mmuBus.cmd.virtualAddress := cmdForkStage.input.payload - mmuBus.cmd.bypassTranslation := False + mmuBus.cmd.last.isValid := cmdForkStage.input.valid + mmuBus.cmd.last.virtualAddress := cmdForkStage.input.payload + mmuBus.cmd.last.bypassTranslation := False mmuBus.end := cmdForkStage.output.fire || externalFlush cmd.pc := mmuBus.rsp.physicalAddress(31 downto 2) @@ U"00" diff --git a/src/main/scala/vexriscv/plugin/MemoryTranslatorPlugin.scala b/src/main/scala/vexriscv/plugin/MemoryTranslatorPlugin.scala index c8c00ec..081b11d 100644 --- a/src/main/scala/vexriscv/plugin/MemoryTranslatorPlugin.scala +++ b/src/main/scala/vexriscv/plugin/MemoryTranslatorPlugin.scala @@ -70,17 +70,17 @@ class MemoryTranslatorPlugin(tlbSize : Int, val ports = for ((port, portId) <- sortedPortsInfo.zipWithIndex) yield new Area { val cache = Vec(Reg(CacheLine()) init, port.args.portTlbSize) - val cacheHits = cache.map(line => line.valid && line.virtualAddress === port.bus.cmd.virtualAddress(31 downto 12)) + val cacheHits = cache.map(line => line.valid && line.virtualAddress === port.bus.cmd.last.virtualAddress(31 downto 12)) val cacheHit = cacheHits.asBits.orR val cacheLine = MuxOH(cacheHits, cache) - val isInMmuRange = virtualRange(port.bus.cmd.virtualAddress) && !port.bus.cmd.bypassTranslation + val isInMmuRange = virtualRange(port.bus.cmd.last.virtualAddress) && !port.bus.cmd.last.bypassTranslation val sharedMiss = RegInit(False) val sharedIterator = Reg(UInt(log2Up(tlbSize + 1) bits)) val sharedAccessed = RegInit(B"00") val entryToReplace = Counter(port.args.portTlbSize) - val sharedAccessAsked = RegNext(port.bus.cmd.isValid && !cacheHit && sharedIterator < tlbSize && isInMmuRange) + val sharedAccessAsked = RegNext(port.bus.cmd.last.isValid && !cacheHit && sharedIterator < tlbSize && isInMmuRange) val sharedAccessGranted = sharedAccessAsked && shared.free when(sharedAccessGranted) { shared.readAddr := sharedIterator.resized @@ -92,7 +92,7 @@ class MemoryTranslatorPlugin(tlbSize : Int, } when(sharedAccessed.msb){ - when(shared.readData.virtualAddress === port.bus.cmd.virtualAddress(31 downto 12)){ + when(shared.readData.virtualAddress === port.bus.cmd.last.virtualAddress(31 downto 12)){ cache(entryToReplace) := shared.readData entryToReplace.increment() } @@ -108,7 +108,7 @@ class MemoryTranslatorPlugin(tlbSize : Int, when(isInMmuRange) { - port.bus.rsp.physicalAddress := cacheLine.physicalAddress @@ port.bus.cmd.virtualAddress(11 downto 0) + port.bus.rsp.physicalAddress := cacheLine.physicalAddress @@ port.bus.cmd.last.virtualAddress(11 downto 0) port.bus.rsp.allowRead := cacheLine.allowRead port.bus.rsp.allowWrite := cacheLine.allowWrite port.bus.rsp.allowExecute := cacheLine.allowExecute @@ -116,7 +116,7 @@ class MemoryTranslatorPlugin(tlbSize : Int, // port.bus.rsp.hit := cacheHit // port.stage.arbitration.haltItself setWhen (port.bus.cmd.isValid && !cacheHit && !sharedMiss) } otherwise { - port.bus.rsp.physicalAddress := port.bus.cmd.virtualAddress + port.bus.rsp.physicalAddress := port.bus.cmd.last.virtualAddress port.bus.rsp.allowRead := True port.bus.rsp.allowWrite := True port.bus.rsp.allowExecute := True diff --git a/src/main/scala/vexriscv/plugin/MmuPlugin.scala b/src/main/scala/vexriscv/plugin/MmuPlugin.scala index 884a2d5..e797bcf 100644 --- a/src/main/scala/vexriscv/plugin/MmuPlugin.scala +++ b/src/main/scala/vexriscv/plugin/MmuPlugin.scala @@ -34,9 +34,9 @@ object MmuPort{ val PRIORITY_DATA = 1 val PRIORITY_INSTRUCTION = 0 } -case class MmuPort(bus : MemoryTranslatorBus, priority : Int, args : MmuPortConfig, id : Int/*, exceptionBus: Flow[ExceptionCause]*/) +case class MmuPort(bus : MemoryTranslatorBus, priority : Int, args : MmuPortConfig, id : Int) -case class MmuPortConfig(portTlbSize : Int) +case class MmuPortConfig(portTlbSize : Int, latency : Int = 0, earlyRequireMmuLockup : Boolean = false, earlyCacheHits : Boolean = false) class MmuPlugin(ioRange : UInt => Bool, virtualRange : UInt => Bool = address => True, @@ -48,7 +48,7 @@ class MmuPlugin(ioRange : UInt => Bool, override def newTranslationPort(priority : Int,args : Any): MemoryTranslatorBus = { val config = args.asInstanceOf[MmuPortConfig] - val port = MmuPort(MemoryTranslatorBus(MemoryTranslatorBusParameter(wayCount = config.portTlbSize)),priority, config, portsInfo.length) + val port = MmuPort(MemoryTranslatorBus(MemoryTranslatorBusParameter(wayCount = config.portTlbSize, latency = config.latency)),priority, config, portsInfo.length) portsInfo += port port.bus } @@ -103,33 +103,51 @@ class MmuPlugin(ioRange : UInt => Bool, val ports = for (port <- sortedPortsInfo) yield new Area { val handle = port val id = port.id - val cache = Vec(Reg(CacheLine()) init, port.args.portTlbSize) - val cacheHits = cache.map(line => line.valid && line.virtualAddress(1) === port.bus.cmd.virtualAddress(31 downto 22) && (line.superPage || line.virtualAddress(0) === port.bus.cmd.virtualAddress(21 downto 12))) - val cacheHit = cacheHits.asBits.orR - val cacheLine = MuxOH(cacheHits, cache) val privilegeService = pipeline.serviceElse(classOf[PrivilegeService], PrivilegeServiceDefault()) - val entryToReplace = Counter(port.args.portTlbSize) - val requireMmuLockup = virtualRange(port.bus.cmd.virtualAddress) && !port.bus.cmd.bypassTranslation && csr.satp.mode + val cache = Vec(Reg(CacheLine()) init, port.args.portTlbSize) + + def toRsp[T <: Data](data : T, from : MemoryTranslatorCmd) : T = from match { + case _ if from == port.bus.cmd.last => data + case _ => { + val next = port.bus.cmd.dropWhile(_ != from)(1) + toRsp(RegNextWhen(data, !next.isStuck), next) + } + } + val requireMmuLockupCmd = port.bus.cmd.takeRight(if(port.args.earlyRequireMmuLockup) 2 else 1).head + + val requireMmuLockupCalc = virtualRange(requireMmuLockupCmd.virtualAddress) && !requireMmuLockupCmd.bypassTranslation && csr.satp.mode if(!enableMmuInMachineMode) { - requireMmuLockup clearWhen(!csr.status.mprv && privilegeService.isMachine()) + requireMmuLockupCalc clearWhen(!csr.status.mprv && privilegeService.isMachine()) when(privilegeService.isMachine()) { if (port.priority == MmuPort.PRIORITY_DATA) { - requireMmuLockup clearWhen (!csr.status.mprv || pipeline(MPP) === 3) + requireMmuLockupCalc clearWhen (!csr.status.mprv || pipeline(MPP) === 3) } else { - requireMmuLockup := False + requireMmuLockupCalc := False } } } + val cacheHitsCmd = port.bus.cmd.takeRight(if(port.args.earlyCacheHits) 2 else 1).head + val cacheHitsCalc = B(cache.map(line => line.valid && line.virtualAddress(1) === cacheHitsCmd.virtualAddress(31 downto 22) && (line.superPage || line.virtualAddress(0) === cacheHitsCmd.virtualAddress(21 downto 12)))) + + + val requireMmuLockup = toRsp(requireMmuLockupCalc, requireMmuLockupCmd) + val cacheHits = toRsp(cacheHitsCalc, cacheHitsCmd) + + val cacheHit = cacheHits.asBits.orR + val cacheLine = MuxOH(cacheHits, cache) + val entryToReplace = Counter(port.args.portTlbSize) + + when(requireMmuLockup) { - port.bus.rsp.physicalAddress := cacheLine.physicalAddress(1) @@ (cacheLine.superPage ? port.bus.cmd.virtualAddress(21 downto 12) | cacheLine.physicalAddress(0)) @@ port.bus.cmd.virtualAddress(11 downto 0) + port.bus.rsp.physicalAddress := cacheLine.physicalAddress(1) @@ (cacheLine.superPage ? port.bus.cmd.last.virtualAddress(21 downto 12) | cacheLine.physicalAddress(0)) @@ port.bus.cmd.last.virtualAddress(11 downto 0) port.bus.rsp.allowRead := cacheLine.allowRead || csr.status.mxr && cacheLine.allowExecute port.bus.rsp.allowWrite := cacheLine.allowWrite port.bus.rsp.allowExecute := cacheLine.allowExecute port.bus.rsp.exception := cacheHit && (cacheLine.exception || cacheLine.allowUser && privilegeService.isSupervisor() && !csr.status.sum || !cacheLine.allowUser && privilegeService.isUser()) port.bus.rsp.refilling := !cacheHit } otherwise { - port.bus.rsp.physicalAddress := port.bus.cmd.virtualAddress + port.bus.rsp.physicalAddress := port.bus.cmd.last.virtualAddress port.bus.rsp.allowRead := True port.bus.rsp.allowWrite := True port.bus.rsp.allowExecute := True @@ -141,7 +159,7 @@ class MmuPlugin(ioRange : UInt => Bool, port.bus.rsp.bypassTranslation := !requireMmuLockup for(wayId <- 0 until port.args.portTlbSize){ port.bus.rsp.ways(wayId).sel := cacheHits(wayId) - port.bus.rsp.ways(wayId).physical := cache(wayId).physicalAddress(1) @@ (cache(wayId).superPage ? port.bus.cmd.virtualAddress(21 downto 12) | cache(wayId).physicalAddress(0)) @@ port.bus.cmd.virtualAddress(11 downto 0) + port.bus.rsp.ways(wayId).physical := cache(wayId).physicalAddress(1) @@ (cache(wayId).superPage ? port.bus.cmd.last.virtualAddress(21 downto 12) | cache(wayId).physicalAddress(0)) @@ port.bus.cmd.last.virtualAddress(11 downto 0) } // Avoid keeping any invalid line in the cache after an exception. @@ -186,13 +204,13 @@ class MmuPlugin(ioRange : UInt => Bool, dBusAccess.cmd.data.assignDontCare() dBusAccess.cmd.writeMask.assignDontCare() - val refills = OHMasking.last(B(sortedPortsInfo.map(port => port.bus.cmd.isValid && port.bus.rsp.refilling))) + val refills = OHMasking.last(B(sortedPortsInfo.map(port => port.bus.cmd.last.isValid && port.bus.rsp.refilling))) switch(state){ is(State.IDLE){ when(refills.orR){ portSortedOh := refills state := State.L1_CMD - val address = MuxOH(refills, sortedPortsInfo.map(_.bus.cmd.virtualAddress)) + val address = MuxOH(refills, sortedPortsInfo.map(_.bus.cmd.last.virtualAddress)) vpn(1) := address(31 downto 22) vpn(0) := address(21 downto 12) } diff --git a/src/main/scala/vexriscv/plugin/StaticMemoryTranslatorPlugin.scala b/src/main/scala/vexriscv/plugin/StaticMemoryTranslatorPlugin.scala index cbe55f9..bc910c0 100644 --- a/src/main/scala/vexriscv/plugin/StaticMemoryTranslatorPlugin.scala +++ b/src/main/scala/vexriscv/plugin/StaticMemoryTranslatorPlugin.scala @@ -26,7 +26,7 @@ class StaticMemoryTranslatorPlugin(ioRange : UInt => Bool) extends Plugin[VexRis val core = pipeline plug new Area { val ports = for ((port, portId) <- portsInfo.zipWithIndex) yield new Area { - port.bus.rsp.physicalAddress := port.bus.cmd.virtualAddress + port.bus.rsp.physicalAddress := port.bus.cmd.last.virtualAddress port.bus.rsp.allowRead := True port.bus.rsp.allowWrite := True port.bus.rsp.allowExecute := True diff --git a/src/test/scala/vexriscv/TestIndividualFeatures.scala b/src/test/scala/vexriscv/TestIndividualFeatures.scala index c31a30a..33a3857 100644 --- a/src/test/scala/vexriscv/TestIndividualFeatures.scala +++ b/src/test/scala/vexriscv/TestIndividualFeatures.scala @@ -405,13 +405,12 @@ class DBusDimension extends VexRiscvDimension("DBus") { override def randomPositionImpl(universes: Seq[ConfigUniverse], r: Random) = { val catchAll = universes.contains(VexRiscvUniverse.CATCH_ALL) - val mmuConfig = if(universes.contains(VexRiscvUniverse.MMU)) MmuPortConfig( portTlbSize = 4) else null val noMemory = universes.contains(VexRiscvUniverse.NO_MEMORY) val noWriteBack = universes.contains(VexRiscvUniverse.NO_WRITEBACK) - if(r.nextDouble() < 0.4 || noMemory){ + val mmuConfig = if(universes.contains(VexRiscvUniverse.MMU)) MmuPortConfig( portTlbSize = 4, latency = 0) else null val withLrSc = catchAll val earlyInjection = r.nextBoolean() && !universes.contains(VexRiscvUniverse.NO_WRITEBACK) new VexRiscvPosition("Simple" + (if(earlyInjection) "Early" else "Late")) { @@ -426,6 +425,8 @@ class DBusDimension extends VexRiscvDimension("DBus") { // override def isCompatibleWith(positions: Seq[ConfigPosition[VexRiscvConfig]]) = catchAll == positions.exists(_.isInstanceOf[CatchAllPosition]) } } else { + val twoStageMmu = r.nextBoolean() && !noMemory && !noWriteBack + val mmuConfig = if(universes.contains(VexRiscvUniverse.MMU)) MmuPortConfig(portTlbSize = 4, latency = if(twoStageMmu) 1 else 0, earlyRequireMmuLockup = Random.nextBoolean() && twoStageMmu, earlyCacheHits = Random.nextBoolean() && twoStageMmu) else null val memDataWidth = List(32,64,128)(r.nextInt(3)) val bytePerLine = Math.max(memDataWidth/8, List(8,16,32,64)(r.nextInt(4))) var cacheSize = 0 @@ -443,7 +444,7 @@ class DBusDimension extends VexRiscvDimension("DBus") { cacheSize = 512 << r.nextInt(5) wayCount = 1 << r.nextInt(3) }while(cacheSize/wayCount < 512 || (catchAll && cacheSize/wayCount > 4096)) - new VexRiscvPosition(s"Cached${memDataWidth}d" + "S" + cacheSize + "W" + wayCount + "BPL" + bytePerLine + (if(dBusCmdMasterPipe) "Cmp " else "") + (if(dBusCmdSlavePipe) "Csp " else "") + (if(dBusRspSlavePipe) "Rsp " else "") + (if(relaxedMemoryTranslationRegister) "Rmtr " else "") + (if(earlyWaysHits) "Ewh " else "") + (if(withAmo) "Amo " else "") + (if(withSmp) "Smp " else "") + (if(directTlbHit) "Dtlb " else "")) { + new VexRiscvPosition(s"Cached${memDataWidth}d" + "S" + cacheSize + "W" + wayCount + "BPL" + bytePerLine + (if(dBusCmdMasterPipe) "Cmp " else "") + (if(dBusCmdSlavePipe) "Csp " else "") + (if(dBusRspSlavePipe) "Rsp " else "") + (if(relaxedMemoryTranslationRegister) "Rmtr " else "") + (if(earlyWaysHits) "Ewh " else "") + (if(withAmo) "Amo " else "") + (if(withSmp) "Smp " else "") + (if(directTlbHit) "Dtlb " else "") + (if(twoStageMmu) "Tsmmu " else "")) { override def testParam = s"DBUS=CACHED DBUS_DATA_WIDTH=$memDataWidth " + (if(withLrSc) "LRSC=yes " else "") + (if(withAmo) "AMO=yes " else "") + (if(withSmp) "DBUS_EXCLUSIVE=yes DBUS_INVALIDATE=yes " else "") override def applyOn(config: VexRiscvConfig): Unit = { From 0e76cf9ac8e385de06b53b98648d552e263e3440 Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Thu, 7 May 2020 22:50:25 +0200 Subject: [PATCH 60/91] i$ now support multi cycle MMU --- .../scala/vexriscv/ip/InstructionCache.scala | 20 ++++------- .../vexriscv/plugin/IBusCachedPlugin.scala | 35 ++++++++++++------- .../vexriscv/TestIndividualFeatures.scala | 8 ++++- 3 files changed, 37 insertions(+), 26 deletions(-) diff --git a/src/main/scala/vexriscv/ip/InstructionCache.scala b/src/main/scala/vexriscv/ip/InstructionCache.scala index 6053c0f..43f5130 100644 --- a/src/main/scala/vexriscv/ip/InstructionCache.scala +++ b/src/main/scala/vexriscv/ip/InstructionCache.scala @@ -112,16 +112,15 @@ case class InstructionCacheCpuFetch(p : InstructionCacheConfig, mmuParameter : M val data = Bits(p.cpuDataWidth bits) val dataBypassValid = p.bypassGen generate Bool() val dataBypass = p.bypassGen generate Bits(p.cpuDataWidth bits) - val mmuBus = MemoryTranslatorBus(mmuParameter) + val mmuRsp = MemoryTranslatorRsp(mmuParameter) val physicalAddress = UInt(p.addressWidth bits) val cacheMiss, error, mmuRefilling, mmuException, isUser = ifGen(!p.twoCycleCache)(Bool) - val haltIt = Bool() //Used to wait on the MMU rsp busy override def asMaster(): Unit = { out(isValid, isStuck, isRemoved, pc) - inWithNull(error,mmuRefilling,mmuException,data, cacheMiss,physicalAddress, haltIt) + inWithNull(error,mmuRefilling,mmuException,data, cacheMiss,physicalAddress) outWithNull(isUser, dataBypass, dataBypassValid) - slaveWithNull(mmuBus) + out(mmuRsp) } } @@ -321,7 +320,6 @@ class InstructionCache(p : InstructionCacheConfig, mmuParameter : MemoryTranslat } }) - io.cpu.fetch.haltIt := io.cpu.fetch.mmuBus.busy val lineLoader = new Area{ val fire = False @@ -412,7 +410,7 @@ class InstructionCache(p : InstructionCacheConfig, mmuParameter : MemoryTranslat val hit = (!twoCycleRam) generate new Area{ - val hits = read.waysValues.map(way => way.tag.valid && way.tag.address === io.cpu.fetch.mmuBus.rsp.physicalAddress(tagRange)) + val hits = read.waysValues.map(way => way.tag.valid && way.tag.address === io.cpu.fetch.mmuRsp.physicalAddress(tagRange)) val valid = Cat(hits).orR val id = OHToUInt(hits) val error = read.waysValues.map(_.tag.error).read(id) @@ -429,14 +427,10 @@ class InstructionCache(p : InstructionCacheConfig, mmuParameter : MemoryTranslat io.cpu.fetch.data := (if(p.bypassGen) (io.cpu.fetch.dataBypassValid ? io.cpu.fetch.dataBypass | cacheData) else cacheData) } - io.cpu.fetch.mmuBus.cmd.last.isValid := io.cpu.fetch.isValid - io.cpu.fetch.mmuBus.cmd.last.virtualAddress := io.cpu.fetch.pc - io.cpu.fetch.mmuBus.cmd.last.bypassTranslation := False - io.cpu.fetch.mmuBus.end := !io.cpu.fetch.isStuck || io.cpu.fetch.isRemoved - io.cpu.fetch.physicalAddress := io.cpu.fetch.mmuBus.rsp.physicalAddress + io.cpu.fetch.physicalAddress := io.cpu.fetch.mmuRsp.physicalAddress val resolution = ifGen(!twoCycleCache)( new Area{ - val mmuRsp = io.cpu.fetch.mmuBus.rsp + val mmuRsp = io.cpu.fetch.mmuRsp io.cpu.fetch.cacheMiss := !hit.valid io.cpu.fetch.error := hit.error @@ -449,7 +443,7 @@ class InstructionCache(p : InstructionCacheConfig, mmuParameter : MemoryTranslat val decodeStage = ifGen(twoCycleCache) (new Area{ def stage[T <: Data](that : T) = RegNextWhen(that,!io.cpu.decode.isStuck) - val mmuRsp = stage(io.cpu.fetch.mmuBus.rsp) + val mmuRsp = stage(io.cpu.fetch.mmuRsp) val hit = if(!twoCycleRam) new Area{ val valid = stage(fetchStage.hit.valid) diff --git a/src/main/scala/vexriscv/plugin/IBusCachedPlugin.scala b/src/main/scala/vexriscv/plugin/IBusCachedPlugin.scala index 4d41790..e23cec7 100644 --- a/src/main/scala/vexriscv/plugin/IBusCachedPlugin.scala +++ b/src/main/scala/vexriscv/plugin/IBusCachedPlugin.scala @@ -155,8 +155,13 @@ class IBusCachedPlugin(resetVector : BigInt = 0x80000000l, cache.io.cpu.prefetch.pc := stages(0).input.payload stages(0).halt setWhen (cache.io.cpu.prefetch.haltIt) - - cache.io.cpu.fetch.isRemoved := externalFlush + if(mmuBus != null && mmuBus.p.latency == 1) { + stages(0).halt setWhen(mmuBus.busy) + mmuBus.cmd(0).isValid := cache.io.cpu.prefetch.isValid + mmuBus.cmd(0).isStuck := !stages(0).input.ready + mmuBus.cmd(0).virtualAddress := cache.io.cpu.prefetch.pc + mmuBus.cmd(0).bypassTranslation := False + } } @@ -172,8 +177,15 @@ class IBusCachedPlugin(resetVector : BigInt = 0x80000000l, cache.io.cpu.fetch.isStuck := !stages(1).input.ready cache.io.cpu.fetch.pc := stages(1).input.payload + if(mmuBus != null) { + mmuBus.cmd.last.isValid := cache.io.cpu.fetch.isValid + mmuBus.cmd.last.isStuck := !stages(1).input.ready + mmuBus.cmd.last.virtualAddress := cache.io.cpu.fetch.pc + mmuBus.cmd.last.bypassTranslation := False + mmuBus.end := stages(1).input.ready || externalFlush + if (mmuBus.p.latency == 0) stages(1).halt setWhen (mmuBus.busy) + } - stages(1).halt setWhen(cache.io.cpu.fetch.haltIt) if (!twoCycleCache) { cache.io.cpu.fetch.isUser := privilegeService.isUser() @@ -249,16 +261,15 @@ class IBusCachedPlugin(resetVector : BigInt = 0x80000000l, } if (mmuBus != null) { - cache.io.cpu.fetch.mmuBus <> mmuBus + cache.io.cpu.fetch.mmuRsp <> mmuBus.rsp } else { - cache.io.cpu.fetch.mmuBus.rsp.physicalAddress := cache.io.cpu.fetch.mmuBus.cmd.last.virtualAddress - cache.io.cpu.fetch.mmuBus.rsp.allowExecute := True - cache.io.cpu.fetch.mmuBus.rsp.allowRead := True - cache.io.cpu.fetch.mmuBus.rsp.allowWrite := True - cache.io.cpu.fetch.mmuBus.rsp.isIoAccess := False - cache.io.cpu.fetch.mmuBus.rsp.exception := False - cache.io.cpu.fetch.mmuBus.rsp.refilling := False - cache.io.cpu.fetch.mmuBus.busy := False + cache.io.cpu.fetch.mmuRsp.physicalAddress := cache.io.cpu.fetch.pc + cache.io.cpu.fetch.mmuRsp.allowExecute := True + cache.io.cpu.fetch.mmuRsp.allowRead := True + cache.io.cpu.fetch.mmuRsp.allowWrite := True + cache.io.cpu.fetch.mmuRsp.isIoAccess := False + cache.io.cpu.fetch.mmuRsp.exception := False + cache.io.cpu.fetch.mmuRsp.refilling := False } val flushStage = decode diff --git a/src/test/scala/vexriscv/TestIndividualFeatures.scala b/src/test/scala/vexriscv/TestIndividualFeatures.scala index 33a3857..66b308b 100644 --- a/src/test/scala/vexriscv/TestIndividualFeatures.scala +++ b/src/test/scala/vexriscv/TestIndividualFeatures.scala @@ -321,9 +321,12 @@ class IBusDimension(rvcRate : Double) extends VexRiscvDimension("IBus") { override def randomPositionImpl(universes: Seq[ConfigUniverse], r: Random) = { val catchAll = universes.contains(VexRiscvUniverse.CATCH_ALL) - val mmuConfig = if(universes.contains(VexRiscvUniverse.MMU)) MmuPortConfig( portTlbSize = 4) else null + val noMemory = universes.contains(VexRiscvUniverse.NO_MEMORY) + val noWriteBack = universes.contains(VexRiscvUniverse.NO_WRITEBACK) + if(r.nextDouble() < 0.5){ + val mmuConfig = if(universes.contains(VexRiscvUniverse.MMU)) MmuPortConfig( portTlbSize = 4) else null val latency = r.nextInt(5) + 1 val compressed = r.nextDouble() < rvcRate val injectorStage = r.nextBoolean() || latency == 1 @@ -347,6 +350,9 @@ class IBusDimension(rvcRate : Double) extends VexRiscvDimension("IBus") { override def instructionAnticipatedOk() = injectorStage } } else { + val twoStageMmu = r.nextBoolean() + val mmuConfig = if(universes.contains(VexRiscvUniverse.MMU)) MmuPortConfig(portTlbSize = 4, latency = if(twoStageMmu) 1 else 0, earlyRequireMmuLockup = Random.nextBoolean() && twoStageMmu, earlyCacheHits = Random.nextBoolean() && twoStageMmu) else null + val catchAll = universes.contains(VexRiscvUniverse.CATCH_ALL) val compressed = r.nextDouble() < rvcRate val tighlyCoupled = r.nextBoolean() && !catchAll From 0a159f06b23d59a567c3b28aaa3abc68cd87c26b Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Thu, 7 May 2020 22:50:36 +0200 Subject: [PATCH 61/91] update smp config --- src/main/scala/vexriscv/TestsWorkspace.scala | 10 +++++++--- .../scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala | 11 ++++++++--- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/src/main/scala/vexriscv/TestsWorkspace.scala b/src/main/scala/vexriscv/TestsWorkspace.scala index b97936d..e25b2b7 100644 --- a/src/main/scala/vexriscv/TestsWorkspace.scala +++ b/src/main/scala/vexriscv/TestsWorkspace.scala @@ -68,13 +68,15 @@ object TestsWorkspace { catchIllegalAccess = true, catchAccessFault = true, asyncTagMemory = false, - twoCycleRam = false, + twoCycleRam = true, twoCycleCache = true // ) ), memoryTranslatorPortConfig = MmuPortConfig( portTlbSize = 4, - latency = 0 + latency = 1, + earlyRequireMmuLockup = true, + earlyCacheHits = true ) ), // ).newTightlyCoupledPort(TightlyCoupledPortParameter("iBusTc", a => a(30 downto 28) === 0x0 && a(5))), @@ -110,7 +112,9 @@ object TestsWorkspace { ), memoryTranslatorPortConfig = MmuPortConfig( portTlbSize = 4, - latency = 1 + latency = 1, + earlyRequireMmuLockup = true, + earlyCacheHits = true ) ), diff --git a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala index 3f23a23..6efdf8f 100644 --- a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala +++ b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala @@ -9,7 +9,7 @@ import spinal.lib.bus.bmb.{Bmb, BmbArbiter, BmbDecoder, BmbExclusiveMonitor, Bmb import spinal.lib.com.jtag.Jtag import spinal.lib.com.jtag.sim.JtagTcp import vexriscv.ip.{DataCacheAck, DataCacheConfig, DataCacheMemBus, InstructionCache, InstructionCacheConfig} -import vexriscv.plugin.{BranchPlugin, CsrPlugin, CsrPluginConfig, DBusCachedPlugin, DBusSimplePlugin, DebugPlugin, DecoderSimplePlugin, FullBarrelShifterPlugin, HazardSimplePlugin, IBusCachedPlugin, IBusSimplePlugin, IntAluPlugin, MmuPlugin, MmuPortConfig, MulDivIterativePlugin, MulPlugin, RegFilePlugin, STATIC, SrcPlugin, YamlPlugin} +import vexriscv.plugin.{BranchPlugin, CsrPlugin, CsrPluginConfig, DBusCachedPlugin, DBusSimplePlugin, DYNAMIC_TARGET, DebugPlugin, DecoderSimplePlugin, FullBarrelShifterPlugin, HazardSimplePlugin, IBusCachedPlugin, IBusSimplePlugin, IntAluPlugin, MmuPlugin, MmuPortConfig, MulDivIterativePlugin, MulPlugin, RegFilePlugin, STATIC, SrcPlugin, YamlPlugin} import vexriscv.{Riscv, VexRiscv, VexRiscvConfig, plugin} import scala.collection.mutable @@ -141,6 +141,8 @@ object VexRiscvSmpClusterGen { resetVector = resetVector, compressedGen = false, prediction = STATIC, + historyRamSizeLog2 = 9, + relaxPredictorAddress = true, injectorStage = false, relaxedPcCalculation = true, config = InstructionCacheConfig( @@ -153,12 +155,15 @@ object VexRiscvSmpClusterGen { catchIllegalAccess = true, catchAccessFault = true, asyncTagMemory = false, - twoCycleRam = true, + twoCycleRam = false, twoCycleCache = true // ) ), memoryTranslatorPortConfig = MmuPortConfig( - portTlbSize = 4 + portTlbSize = 4, + latency = 1, + earlyRequireMmuLockup = true, + earlyCacheHits = true ) ), // ).newTightlyCoupledPort(TightlyCoupledPortParameter("iBusTc", a => a(30 downto 28) === 0x0 && a(5))), From b592b0bff8d434dbcca4dc204d2b7bec2729adb4 Mon Sep 17 00:00:00 2001 From: Charles Papon Date: Sat, 9 May 2020 17:00:13 +0200 Subject: [PATCH 62/91] Add regression TRACE_SPORADIC, LINUX_SOC_SMP regression golden model now properly sync dut exceptions --- src/main/scala/vexriscv/TestsWorkspace.scala | 3 +- .../scala/vexriscv/plugin/CsrPlugin.scala | 3 +- src/test/cpp/regression/main.cpp | 124 +++++++++++++++++- src/test/cpp/regression/makefile | 16 +++ 4 files changed, 137 insertions(+), 9 deletions(-) diff --git a/src/main/scala/vexriscv/TestsWorkspace.scala b/src/main/scala/vexriscv/TestsWorkspace.scala index e25b2b7..8a78544 100644 --- a/src/main/scala/vexriscv/TestsWorkspace.scala +++ b/src/main/scala/vexriscv/TestsWorkspace.scala @@ -28,6 +28,7 @@ import spinal.lib.eda.altera.{InterruptReceiverTag, ResetEmitterTag} // make clean all SEED=42 MMU=no STOP_ON_ERROR=yes DBUS_EXCLUSIVE=yes DBUS_INVALIDATE=yes SUPERVISOR=yes REDO=1 DHRYSTONE=yes LRSC=yes AMO=yes LINUX_REGRESSION=yes TRACE=yes TRACE_START=1000000000 FLOW_INFO=ye IBUS_DATA_WIDTH=128 DBUS_DATA_WIDTH=128 +//make clean all SEED=42 MMU=no STOP_ON_ERROR=yes DBUS_EXCLUSIVE=yes DBUS_INVALIDATE=yes SUPERVISOR=yes REDO=1 DHRYSTONE=yes LRSC=yes AMO=yes TRACE=yes TRACE_START=1000000000 FLOW_INFO=ye IBUS_DATA_WIDTH=128 DBUS_DATA_WIDTH=128 LINUX_SOC_SMP=yes VMLINUX=../../../../../buildroot/output/images/Image RAMDISK=../../../../../buildroot/output/images/rootfs.cpio DTB=../../../../../buildroot/output/images/dtb EMULATOR=../../../../../opensbi/build/platform/spinal/vexriscv/sim/smp/firmware/fw_jump.bin object TestsWorkspace { def main(args: Array[String]) { def configFull = { @@ -156,7 +157,7 @@ object TestsWorkspace { divUnrollFactor = 1 ), // new DivPlugin, - new CsrPlugin(CsrPluginConfig.all2(0x80000020l).copy(ebreakGen = false)), + new CsrPlugin(CsrPluginConfig.all2(0x80000020l).copy(ebreakGen = false, misaExtensionsInit = Riscv.misaToInt("imas"))), // new CsrPlugin(//CsrPluginConfig.all2(0x80000020l).copy(ebreakGen = true)/* // CsrPluginConfig( // catchIllegalAccess = false, diff --git a/src/main/scala/vexriscv/plugin/CsrPlugin.scala b/src/main/scala/vexriscv/plugin/CsrPlugin.scala index bb56c3e..43dbfaf 100644 --- a/src/main/scala/vexriscv/plugin/CsrPlugin.scala +++ b/src/main/scala/vexriscv/plugin/CsrPlugin.scala @@ -8,6 +8,7 @@ import vexriscv.plugin.IntAluPlugin.{ALU_BITWISE_CTRL, ALU_CTRL, AluBitwiseCtrlE import scala.collection.mutable.ArrayBuffer import scala.collection.mutable +import spinal.core.sim._ /** * Created by spinalvm on 21.03.17. @@ -874,7 +875,7 @@ class CsrPlugin(val config: CsrPluginConfig) extends Plugin[VexRiscv] with Excep interruptJump := interrupt.valid && pipelineLiberator.done && allowInterrupts if(pipelinedInterrupt) interrupt.valid clearWhen(interruptJump) //avoid double fireing - val hadException = RegNext(exception) init(False) + val hadException = RegNext(exception) init(False) addTag(Verilator.public) pipelineLiberator.done.clearWhen(hadException) diff --git a/src/test/cpp/regression/main.cpp b/src/test/cpp/regression/main.cpp index e0f50ab..717e534 100644 --- a/src/test/cpp/regression/main.cpp +++ b/src/test/cpp/regression/main.cpp @@ -390,7 +390,7 @@ public: mcause.raw = 0; mbadaddr = 0; mepc = 0; - misa = 0; //TODO + misa = 0x40041101; //TODO status.raw = 0; status.mpp = 3; status.spp = 1; @@ -401,6 +401,7 @@ public: ipSoft = 0; ipInput = 0; stepCounter = 0; + sbadaddr = 42; lrscReserved = false; } @@ -513,7 +514,7 @@ public: pcWrite(xtvec.base << 2); if(interrupt) livenessInterrupt = 0; - if(!interrupt) step(); //As VexRiscv instruction which trap do not reach writeback stage fire +// if(!interrupt) step(); //As VexRiscv instruction which trap do not reach writeback stage fire } uint32_t currentInstruction; @@ -540,6 +541,7 @@ public: case MISA: *value = misa; break; case MEDELEG: *value = medeleg; break; case MIDELEG: *value = mideleg; break; + case MHARTID: *value = 0; break; case SSTATUS: *value = status.raw & 0xC0133; break; case SIP: *value = getIp().raw & 0x333; break; @@ -578,7 +580,7 @@ public: case MEPC: mepc = value; break; case MSCRATCH: mscratch = value; break; case MISA: misa = value; break; - case MEDELEG: medeleg = value; break; + case MEDELEG: medeleg = value & (~0x8); break; case MIDELEG: mideleg = value; break; case SSTATUS: maskedWrite(status.raw, value,0xC0133); break; @@ -1259,7 +1261,7 @@ public: top = new VVexRiscv; #ifdef TRACE_ACCESS regTraces.open (name + ".regTrace"); - memTraces.open (name + ".memTrace");hh + memTraces.open (name + ".memTrace"); #endif logTraces.open (name + ".logTrace"); debugLog.open (name + ".debugTrace"); @@ -1342,7 +1344,7 @@ public: #endif ) << #endif - " : WRITE mem" << (1 << size) << "[" << addr << "] = " << *data << endl; + " : WRITE mem" << hex << (1 << size) << "[" << addr << "] = " << *data << dec << endl; for(uint32_t b = 0;b < (1 << size);b++){ uint32_t offset = (addr+b)&0x3; if((mask >> offset) & 1 == 1) @@ -1356,6 +1358,7 @@ public: *data &= ~(0xFF << (offset*8)); *data |= mem[addr + b] << (offset*8); } + /* memTraces << #ifdef TRACE_WITH_TIME (currentTime @@ -1364,7 +1367,7 @@ public: #endif ) << #endif - " : READ mem" << (1 << size) << "[" << addr << "] = " << *data << endl; + " : READ mem" << (1 << size) << "[" << addr << "] = " << *data << endl;*/ } } @@ -1430,6 +1433,9 @@ public: #ifdef TRACE if(i == TRACE_START && i != 0) cout << "**" << endl << "**" << endl << "**" << endl << "**" << endl << "**" << endl << "START TRACE" << endl; if(i >= TRACE_START) tfp->dump(i); + #ifdef TRACE_SPORADIC + else if(i % 1000000 < 100) tfp->dump(i); + #endif #endif } @@ -1624,6 +1630,14 @@ public: } } + #ifdef CSR + if(top->VexRiscv->CsrPlugin_hadException){ + if(riscvRefEnable) { + riscvRef.step(); + } + } + #endif + for(SimElement* simElement : simElements) simElement->preCycle(); dump(i + 1); @@ -3451,7 +3465,6 @@ public: } }; - class LinuxRegression: public LinuxSoc{ public: string pendingLine = ""; @@ -3484,6 +3497,82 @@ public: #endif +#ifdef LINUX_SOC_SMP + +class LinuxSocSmp : public Workspace{ +public: + queue customCin; + void pushCin(string m){ + for(char& c : m) { + customCin.push(c); + } + } + + LinuxSocSmp(string name) : Workspace(name) { + #ifdef WITH_USER_IO + stdinNonBuffered(); + captureCtrlC(); + #endif + stdoutNonBuffered(); + } + + virtual ~LinuxSocSmp(){ + #ifdef WITH_USER_IO + stdinRestore(); + #endif + } + virtual bool isDBusCheckedRegion(uint32_t address){ return true;} + virtual bool isPerifRegion(uint32_t addr) { return (addr & 0xF0000000) == 0xF0000000;} + virtual bool isMmuRegion(uint32_t addr) { return true; } + + + + virtual void dBusAccess(uint32_t addr,bool wr, uint32_t size,uint32_t mask, uint32_t *data, bool *error) { + if(isPerifRegion(addr)) switch(addr){ + //TODO Emulate peripherals here + case 0xF0010000: if(wr && *data != 0) fail(); else *data = 0; break; + case 0xF001BFF8: if(wr) fail(); else *data = mTime; break; + case 0xF001BFFC: if(wr) fail(); else *data = mTime >> 32; break; + case 0xF0014000: if(wr) mTimeCmp = (mTimeCmp & 0xFFFFFFFF00000000) | *data; else fail(); break; + case 0xF0014004: if(wr) mTimeCmp = (mTimeCmp & 0x00000000FFFFFFFF) | (((uint64_t)*data) << 32); else fail(); break; + case 0xF0000000: + if(wr){ + char c = (char)*data; + cout << c; + logTraces << c; + logTraces.flush(); + onStdout(c); + } + case 0xF0000004: + if(!wr){ + #ifdef WITH_USER_IO + if(stdinNonEmpty()){ + char c; + read(0, &c, 1); + *data = c; + } else + #endif + if(!customCin.empty()){ + *data = customCin.front(); + customCin.pop(); + } else { + *data = -1; + } + } + break; + default: cout << "Unmapped peripheral access : addr=0x" << hex << addr << " wr=" << wr << " mask=0x" << mask << " data=0x" << data << dec << endl; fail(); break; + } + + Workspace::dBusAccess(addr,wr,size,mask,data,error); + } + + virtual void onStdout(char c){ + + } +}; + +#endif + string riscvTestMain[] = { //"rv32ui-p-simple", "rv32ui-p-lui", @@ -3840,6 +3929,27 @@ int main(int argc, char **argv, char **env) { #endif +#ifdef LINUX_SOC_SMP + { + + LinuxSocSmp soc("linuxSmp"); + #ifndef DEBUG_PLUGIN_EXTERNAL + soc.withRiscvRef(); + soc.loadBin(EMULATOR, 0x80000000); + soc.loadBin(VMLINUX, 0xC0000000); + soc.loadBin(DTB, 0xC4000000); + soc.loadBin(RAMDISK, 0xC2000000); + #endif + //soc.setIStall(true); + //soc.setDStall(true); + soc.bootAt(0x80000000); + soc.run(0); +// soc.run((496300000l + 2000000) / 2); +// soc.run(438700000l/2); + return -1; + } +#endif + diff --git a/src/test/cpp/regression/makefile b/src/test/cpp/regression/makefile index da525c5..c7dcf5f 100644 --- a/src/test/cpp/regression/makefile +++ b/src/test/cpp/regression/makefile @@ -9,6 +9,7 @@ DBUS_DATA_WIDTH?=32 TRACE?=no TRACE_ACCESS?=no TRACE_START=0 +TRACE_SPORADIC?=no ISA_TEST?=yes MUL?=yes DIV?=yes @@ -84,6 +85,15 @@ ifeq ($(LINUX_SOC),yes) ADDCFLAGS += -CFLAGS -DEMULATOR='\"$(EMULATOR)\"' endif +ifeq ($(LINUX_SOC_SMP),yes) + ADDCFLAGS += -CFLAGS -DLINUX_SOC_SMP + ADDCFLAGS += -CFLAGS -DVMLINUX='\"$(VMLINUX)\"' + ADDCFLAGS += -CFLAGS -DDTB='\"$(DTB)\"' + ADDCFLAGS += -CFLAGS -DRAMDISK='\"$(RAMDISK)\"' + ADDCFLAGS += -CFLAGS -DEMULATOR='\"$(EMULATOR)\"' +endif + + ARCH_LINUX=rv32i ifeq ($(MUL),yes) ifeq ($(DIV),yes) @@ -187,6 +197,12 @@ ifeq ($(TRACE),yes) ADDCFLAGS += -CFLAGS -DTRACE endif +ifeq ($(TRACE_SPORADIC),yes) + ADDCFLAGS += -CFLAGS -DTRACE_SPORADIC +endif + + + ifeq ($(CSR),yes) ADDCFLAGS += -CFLAGS -DCSR endif From 63511b19a2fdc468aec5a11ec92cd2030b4b6cb0 Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Mon, 11 May 2020 10:35:24 +0200 Subject: [PATCH 63/91] smp cluster add more profiling --- .../vexriscv/demo/smp/VexRiscvSmpCluster.scala | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala index 6efdf8f..1778687 100644 --- a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala +++ b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala @@ -533,6 +533,9 @@ object VexRiscvSmpClusterOpenSbi extends App{ var iMemReadBytes, dMemReadBytes, dMemWriteBytes, iMemSequencial,iMemRequests = 0l var reportTimer = 0 var reportCycle = 0 + var dMemWrites, dMemWritesCached = 0l + var dMemWriteCacheAddress = 0l + val dMemWriteCacheMask = ~((1 << log2Up(128/8))-1) import java.io._ val csv = new PrintWriter(new File("bench.csv" )) @@ -540,7 +543,6 @@ object VexRiscvSmpClusterOpenSbi extends App{ var sequencialPrediction = 0l val cache = dut.cpus(i).core.children.find(_.isInstanceOf[InstructionCache]).head.asInstanceOf[InstructionCache].io.cpu.decode }) - csv.write(s"reportCycle,iMemReadBytes,dMemReadBytes,dMemWriteBytes,miaou,asd\n") dut.clockDomain.onSamplings{ for(i <- 0 until cpuCount; iMem = dut.io.iMems(i); ctx = iMemCtx(i)){ // if(iMem.cmd.valid.toBoolean && iMem.cmd.ready.toBoolean){ @@ -568,6 +570,13 @@ object VexRiscvSmpClusterOpenSbi extends App{ if(dut.io.dMem.cmd.valid.toBoolean && dut.io.dMem.cmd.ready.toBoolean){ if(dut.io.dMem.cmd.opcode.toInt == Bmb.Cmd.Opcode.WRITE){ dMemWriteBytes += dut.io.dMem.cmd.length.toInt+1 + val address = dut.io.dMem.cmd.address.toLong + dMemWrites += 1 + if((address & dMemWriteCacheMask) == (dMemWriteCacheAddress & dMemWriteCacheMask)){ + dMemWritesCached += 1 + } else { + dMemWriteCacheAddress = address + } }else { dMemReadBytes += dut.io.dMem.cmd.length.toInt+1 } @@ -578,7 +587,8 @@ object VexRiscvSmpClusterOpenSbi extends App{ reportTimer = 0 // println(f"\n** c=${reportCycle} ir=${iMemReadBytes*1e-6}%5.2f dr=${dMemReadBytes*1e-6}%5.2f dw=${dMemWriteBytes*1e-6}%5.2f **\n") - csv.write(s"$reportCycle,$iMemReadBytes,$dMemReadBytes,$dMemWriteBytes,$iMemRequests,$iMemSequencial\n") + + csv.write(s"$reportCycle,$iMemReadBytes,$dMemReadBytes,$dMemWriteBytes,$iMemRequests,$iMemSequencial,$dMemWrites,$dMemWritesCached\n") csv.flush() reportCycle = 0 iMemReadBytes = 0 @@ -586,6 +596,8 @@ object VexRiscvSmpClusterOpenSbi extends App{ dMemWriteBytes = 0 iMemRequests = 0 iMemSequencial = 0 + dMemWrites = 0 + dMemWritesCached = 0 } } From cb44a474fcb2e1ef99c39495e5481c947014de70 Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Tue, 12 May 2020 13:25:55 +0200 Subject: [PATCH 64/91] more smp cluster profiling --- .../demo/smp/VexRiscvSmpCluster.scala | 22 ++++++++++++------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala index 1778687..b960643 100644 --- a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala +++ b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala @@ -534,8 +534,11 @@ object VexRiscvSmpClusterOpenSbi extends App{ var reportTimer = 0 var reportCycle = 0 var dMemWrites, dMemWritesCached = 0l - var dMemWriteCacheAddress = 0l - val dMemWriteCacheMask = ~((1 << log2Up(128/8))-1) + val dMemWriteCacheCtx = List(4,8,16,32,64).map(bytes => new { + var counter = 0l + var address = 0l + val mask = ~((1 << log2Up(bytes))-1) + }) import java.io._ val csv = new PrintWriter(new File("bench.csv" )) @@ -572,13 +575,16 @@ object VexRiscvSmpClusterOpenSbi extends App{ dMemWriteBytes += dut.io.dMem.cmd.length.toInt+1 val address = dut.io.dMem.cmd.address.toLong dMemWrites += 1 - if((address & dMemWriteCacheMask) == (dMemWriteCacheAddress & dMemWriteCacheMask)){ - dMemWritesCached += 1 - } else { - dMemWriteCacheAddress = address + for(ctx <- dMemWriteCacheCtx){ + if((address & ctx.mask) == (ctx.address & ctx.mask)){ + ctx.counter += 1 + } else { + ctx.address = address + } } }else { dMemReadBytes += dut.io.dMem.cmd.length.toInt+1 + for(ctx <- dMemWriteCacheCtx) ctx.address = -1 } } reportTimer = reportTimer + 1 @@ -588,7 +594,7 @@ object VexRiscvSmpClusterOpenSbi extends App{ // println(f"\n** c=${reportCycle} ir=${iMemReadBytes*1e-6}%5.2f dr=${dMemReadBytes*1e-6}%5.2f dw=${dMemWriteBytes*1e-6}%5.2f **\n") - csv.write(s"$reportCycle,$iMemReadBytes,$dMemReadBytes,$dMemWriteBytes,$iMemRequests,$iMemSequencial,$dMemWrites,$dMemWritesCached\n") + csv.write(s"$reportCycle,$iMemReadBytes,$dMemReadBytes,$dMemWriteBytes,$iMemRequests,$iMemSequencial,$dMemWrites,${dMemWriteCacheCtx.map(_.counter).mkString(",")}\n") csv.flush() reportCycle = 0 iMemReadBytes = 0 @@ -597,7 +603,7 @@ object VexRiscvSmpClusterOpenSbi extends App{ iMemRequests = 0 iMemSequencial = 0 dMemWrites = 0 - dMemWritesCached = 0 + for(ctx <- dMemWriteCacheCtx) ctx.counter = 0 } } From 0471c7ad7609f960a2220c2e184c129924643283 Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Tue, 12 May 2020 23:55:47 +0200 Subject: [PATCH 65/91] Fix machineCsr test --- .../cpp/raw/machineCsr/build/machineCsr.asm | 88 ++++--- .../cpp/raw/machineCsr/build/machineCsr.hex | 20 +- .../machineCsr/build/machineCsrCompressed.asm | 240 ++++++++++-------- .../machineCsr/build/machineCsrCompressed.hex | 58 +++-- src/test/cpp/raw/machineCsr/src/crt.S | 14 + 5 files changed, 239 insertions(+), 181 deletions(-) diff --git a/src/test/cpp/raw/machineCsr/build/machineCsr.asm b/src/test/cpp/raw/machineCsr/build/machineCsr.asm index 4d80d75..679be70 100644 --- a/src/test/cpp/raw/machineCsr/build/machineCsr.asm +++ b/src/test/cpp/raw/machineCsr/build/machineCsr.asm @@ -29,26 +29,26 @@ Disassembly of section .crt_section: 80000044: 01de7f33 and t5,t3,t4 80000048: 000f1863 bnez t5,80000058 8000004c: 34102ef3 csrr t4,mepc -80000050: 004e8e93 addi t4,t4,4 # 80000004 +80000050: 004e8e93 addi t4,t4,4 # 80000004 80000054: 341e9073 csrw mepc,t4 80000058 : 80000058: 80000eb7 lui t4,0x80000 -8000005c: 003e8e93 addi t4,t4,3 # 80000003 +8000005c: 003e8e93 addi t4,t4,3 # 80000003 80000060: 01ce9863 bne t4,t3,80000070 80000064: f0013c37 lui s8,0xf0013 80000068: 00000c93 li s9,0 -8000006c: 019c2023 sw s9,0(s8) # f0013000 +8000006c: 019c2023 sw s9,0(s8) # f0013000 80000070 : 80000070: 80000eb7 lui t4,0x80000 -80000074: 007e8e93 addi t4,t4,7 # 80000007 +80000074: 007e8e93 addi t4,t4,7 # 80000007 80000078: 01ce9463 bne t4,t3,80000080 8000007c: 30405073 csrwi mie,0 80000080 : 80000080: 80000eb7 lui t4,0x80000 -80000084: 00be8e93 addi t4,t4,11 # 8000000b +80000084: 00be8e93 addi t4,t4,11 # 8000000b 80000088: 01ce9463 bne t4,t3,80000090 8000008c: 30405073 csrwi mie,0 @@ -65,7 +65,7 @@ Disassembly of section .crt_section: 800000ac: 30429073 csrw mie,t0 800000b0: f0013c37 lui s8,0xf0013 800000b4: 00100c93 li s9,1 -800000b8: 019c2023 sw s9,0(s8) # f0013000 +800000b8: 019c2023 sw s9,0(s8) # f0013000 800000bc: 00000013 nop 800000c0: 00000013 nop 800000c4: 00000013 nop @@ -101,41 +101,55 @@ Disassembly of section .crt_section: 8000013c: 00000013 nop 80000140: 00500e13 li t3,5 80000144: f01001b7 lui gp,0xf0100 -80000148: f4018193 addi gp,gp,-192 # f00fff40 +80000148: f4018193 addi gp,gp,-192 # f00fff40 8000014c: 0001a203 lw tp,0(gp) 80000150: 0041a283 lw t0,4(gp) 80000154: 3ff20213 addi tp,tp,1023 # 3ff 80000158: 0041a423 sw tp,8(gp) 8000015c: 0051a623 sw t0,12(gp) -80000160: 00600e13 li t3,6 -80000164: 08000213 li tp,128 -80000168: 30421073 csrw mie,tp -8000016c: 00700e13 li t3,7 -80000170: 10500073 wfi -80000174: 00800e13 li t3,8 -80000178: 00100193 li gp,1 -8000017c: 0041a023 sw tp,0(gp) -80000180: 00900e13 li t3,9 -80000184: 00419023 sh tp,0(gp) -80000188: 00a00e13 li t3,10 -8000018c: 0001a203 lw tp,0(gp) -80000190: 00b00e13 li t3,11 -80000194: 00019203 lh tp,0(gp) -80000198: 00c00e13 li t3,12 -8000019c: 00d00e13 li t3,13 -800001a0: 00002083 lw ra,0(zero) # 0 +80000160: 00000013 nop +80000164: 00000013 nop +80000168: 00000013 nop +8000016c: 00000013 nop +80000170: 00000013 nop +80000174: 00000013 nop +80000178: 00000013 nop +8000017c: 00000013 nop +80000180: 00000013 nop +80000184: 00000013 nop +80000188: 00000013 nop +8000018c: 00000013 nop +80000190: 00000013 nop +80000194: 00000013 nop +80000198: 00600e13 li t3,6 +8000019c: 08000213 li tp,128 +800001a0: 30421073 csrw mie,tp +800001a4: 00700e13 li t3,7 +800001a8: 10500073 wfi +800001ac: 00800e13 li t3,8 +800001b0: 00100193 li gp,1 +800001b4: 0041a023 sw tp,0(gp) +800001b8: 00900e13 li t3,9 +800001bc: 00419023 sh tp,0(gp) +800001c0: 00a00e13 li t3,10 +800001c4: 0001a203 lw tp,0(gp) +800001c8: 00b00e13 li t3,11 +800001cc: 00019203 lh tp,0(gp) +800001d0: 00c00e13 li t3,12 +800001d4: 00d00e13 li t3,13 +800001d8: 00002083 lw ra,0(zero) # 0 -800001a4 : -800001a4: 0020006f j 800001a6 -800001a8: 00002083 lw ra,0(zero) # 0 -800001ac: 00e00e13 li t3,14 -800001b0: 20200073 hret -800001b4: 00f00e13 li t3,15 -800001b8: f01000b7 lui ra,0xf0100 -800001bc: f6008093 addi ra,ra,-160 # f00fff60 -800001c0: 0000a103 lw sp,0(ra) -800001c4: 01000e13 li t3,16 -800001c8: 0020a023 sw sp,0(ra) -800001cc: 01100e13 li t3,17 -800001d0: 00008067 ret +800001dc : +800001dc: 0020006f j 800001de +800001e0: 00002083 lw ra,0(zero) # 0 +800001e4: 00e00e13 li t3,14 +800001e8: 20200073 hret +800001ec: 00f00e13 li t3,15 +800001f0: f01000b7 lui ra,0xf0100 +800001f4: f6008093 addi ra,ra,-160 # f00fff60 +800001f8: 0000a103 lw sp,0(ra) +800001fc: 01000e13 li t3,16 +80000200: 0020a023 sw sp,0(ra) +80000204: 01100e13 li t3,17 +80000208: 00008067 ret ... diff --git a/src/test/cpp/raw/machineCsr/build/machineCsr.hex b/src/test/cpp/raw/machineCsr/build/machineCsr.hex index d104c88..d6c33e7 100644 --- a/src/test/cpp/raw/machineCsr/build/machineCsr.hex +++ b/src/test/cpp/raw/machineCsr/build/machineCsr.hex @@ -21,13 +21,17 @@ :100130001300000013000000130000001300000073 :10014000130E5000B70110F0938101F403A20100D7 :1001500083A241001302F23F23A4410023A65100D1 -:10016000130E60001302000873104230130E70006B -:1001700073005010130E80009301100023A0410063 -:10018000130E900023904100130EA00003A2010063 -:10019000130EB00003920100130EC000130ED00026 -:1001A000832000006F00200083200000130EE00079 -:1001B00073002020130EF000B70010F0938000F6BB -:1001C00003A10000130E000123A02000130E100154 -:1001D0006780000000000000000000000000000038 +:100160001300000013000000130000001300000043 +:100170001300000013000000130000001300000033 +:100180001300000013000000130000001300000023 +:100190001300000013000000130E6000130200089B +:1001A00073104230130E700073005010130E800055 +:1001B0009301100023A04100130E900023904100F2 +:1001C000130EA00003A20100130EB0000392010061 +:1001D000130EC000130ED000832000006F0020001B +:1001E00083200000130EE00073002020130EF000A7 +:1001F000B70010F0938000F603A10000130E000179 +:1002000023A02000130E10016780000000000000F2 +:1002100000000000000000000000000000000000DE :0400000580000094E3 :00000001FF diff --git a/src/test/cpp/raw/machineCsr/build/machineCsrCompressed.asm b/src/test/cpp/raw/machineCsr/build/machineCsrCompressed.asm index df9e96f..097f4e3 100644 --- a/src/test/cpp/raw/machineCsr/build/machineCsrCompressed.asm +++ b/src/test/cpp/raw/machineCsr/build/machineCsrCompressed.asm @@ -5,8 +5,7 @@ build/machineCsrCompressed.elf: file format elf32-littleriscv Disassembly of section .crt_section: 80000000 : -80000000: a071 j 8000008c <_start> -80000002: 0001 nop +80000000: 0940006f j 80000094 <_start> 80000004: 00000013 nop 80000008: 00000013 nop 8000000c: 00000013 nop @@ -17,123 +16,140 @@ Disassembly of section .crt_section: 80000020 : 80000020: 34202e73 csrr t3,mcause -80000024: 000e1c63 bnez t3,8000003c +80000024: 000e1e63 bnez t3,80000040 80000028: ffc00f13 li t5,-4 8000002c: 34102ef3 csrr t4,mepc 80000030: 01eefeb3 and t4,t4,t5 -80000034: 0e91 addi t4,t4,4 -80000036: 341e9073 csrw mepc,t4 -8000003a: a821 j 80000052 +80000034: 004e8e93 addi t4,t4,4 +80000038: 341e9073 csrw mepc,t4 +8000003c: 01c0006f j 80000058 -8000003c : -8000003c: 80000eb7 lui t4,0x80000 -80000040: 01de7f33 and t5,t3,t4 -80000044: 000f1763 bnez t5,80000052 -80000048: 34102ef3 csrr t4,mepc -8000004c: 0e91 addi t4,t4,4 -8000004e: 341e9073 csrw mepc,t4 +80000040 : +80000040: 80000eb7 lui t4,0x80000 +80000044: 01de7f33 and t5,t3,t4 +80000048: 000f1863 bnez t5,80000058 +8000004c: 34102ef3 csrr t4,mepc +80000050: 004e8e93 addi t4,t4,4 # 80000004 +80000054: 341e9073 csrw mepc,t4 -80000052 : -80000052: 80000eb7 lui t4,0x80000 -80000056: 003e8e93 addi t4,t4,3 # 80000003 <_start+0xffffff77> -8000005a: 01ce9763 bne t4,t3,80000068 -8000005e: f0013c37 lui s8,0xf0013 -80000062: 4c81 li s9,0 -80000064: 019c2023 sw s9,0(s8) # f0013000 <_start+0x70012f74> +80000058 : +80000058: 80000eb7 lui t4,0x80000 +8000005c: 003e8e93 addi t4,t4,3 # 80000003 +80000060: 01ce9863 bne t4,t3,80000070 +80000064: f0013c37 lui s8,0xf0013 +80000068: 00000c93 li s9,0 +8000006c: 019c2023 sw s9,0(s8) # f0013000 -80000068 : -80000068: 80000eb7 lui t4,0x80000 -8000006c: 007e8e93 addi t4,t4,7 # 80000007 <_start+0xffffff7b> -80000070: 01ce9463 bne t4,t3,80000078 -80000074: 30405073 csrwi mie,0 +80000070 : +80000070: 80000eb7 lui t4,0x80000 +80000074: 007e8e93 addi t4,t4,7 # 80000007 +80000078: 01ce9463 bne t4,t3,80000080 +8000007c: 30405073 csrwi mie,0 -80000078 : -80000078: 80000eb7 lui t4,0x80000 -8000007c: 00be8e93 addi t4,t4,11 # 8000000b <_start+0xffffff7f> -80000080: 01ce9463 bne t4,t3,80000088 -80000084: 30405073 csrwi mie,0 +80000080 : +80000080: 80000eb7 lui t4,0x80000 +80000084: 00be8e93 addi t4,t4,11 # 8000000b +80000088: 01ce9463 bne t4,t3,80000090 +8000008c: 30405073 csrwi mie,0 -80000088 : -80000088: 30200073 mret +80000090 : +80000090: 30200073 mret -8000008c <_start>: -8000008c: 4e05 li t3,1 -8000008e: 00000073 ecall -80000092: 4e09 li t3,2 -80000094: 42a1 li t0,8 -80000096: 3002a073 csrs mstatus,t0 -8000009a: 42a1 li t0,8 -8000009c: 30429073 csrw mie,t0 -800000a0: f0013c37 lui s8,0xf0013 -800000a4: 4c85 li s9,1 -800000a6: 019c2023 sw s9,0(s8) # f0013000 <_start+0x70012f74> -800000aa: 0001 nop -800000ac: 0001 nop -800000ae: 0001 nop -800000b0: 0001 nop -800000b2: 0001 nop -800000b4: 0001 nop -800000b6: 0001 nop -800000b8: 0001 nop -800000ba: 0001 nop -800000bc: 0001 nop -800000be: 0001 nop -800000c0: 0001 nop -800000c2: 4e0d li t3,3 -800000c4: 08000293 li t0,128 -800000c8: 30429073 csrw mie,t0 -800000cc: 0001 nop -800000ce: 0001 nop -800000d0: 0001 nop -800000d2: 0001 nop -800000d4: 0001 nop -800000d6: 0001 nop -800000d8: 0001 nop -800000da: 4e11 li t3,4 -800000dc: 000012b7 lui t0,0x1 -800000e0: 80028293 addi t0,t0,-2048 # 800 -800000e4: 30429073 csrw mie,t0 -800000e8: 0001 nop -800000ea: 0001 nop -800000ec: 0001 nop -800000ee: 0001 nop -800000f0: 0001 nop -800000f2: 0001 nop -800000f4: 0001 nop -800000f6: 4e15 li t3,5 -800000f8: f01001b7 lui gp,0xf0100 -800000fc: f4018193 addi gp,gp,-192 # f00fff40 <_start+0x700ffeb4> -80000100: 0001a203 lw tp,0(gp) -80000104: 0041a283 lw t0,4(gp) -80000108: 3ff20213 addi tp,tp,1023 # 3ff -8000010c: 0041a423 sw tp,8(gp) -80000110: 0051a623 sw t0,12(gp) -80000114: 4e19 li t3,6 -80000116: 08000213 li tp,128 -8000011a: 30421073 csrw mie,tp -8000011e: 4e1d li t3,7 -80000120: 10500073 wfi -80000124: 4e21 li t3,8 -80000126: 4185 li gp,1 -80000128: 0041a023 sw tp,0(gp) -8000012c: 4e25 li t3,9 -8000012e: 00419023 sh tp,0(gp) -80000132: 4e29 li t3,10 -80000134: 0001a203 lw tp,0(gp) -80000138: 4e2d li t3,11 -8000013a: 00019203 lh tp,0(gp) -8000013e: 4e31 li t3,12 -80000140: 4e35 li t3,13 -80000142: 00002083 lw ra,0(zero) # 0 -80000146: 00002083 lw ra,0(zero) # 0 -8000014a: 4e39 li t3,14 -8000014c: 20200073 hret -80000150: 4e3d li t3,15 -80000152: f01000b7 lui ra,0xf0100 -80000156: f6008093 addi ra,ra,-160 # f00fff60 <_start+0x700ffed4> -8000015a: 0000a103 lw sp,0(ra) -8000015e: 4e41 li t3,16 -80000160: 0020a023 sw sp,0(ra) -80000164: 4e45 li t3,17 -80000166: 8082 ret +80000094 <_start>: +80000094: 00100e13 li t3,1 +80000098: 00000073 ecall +8000009c: 00200e13 li t3,2 +800000a0: 00800293 li t0,8 +800000a4: 3002a073 csrs mstatus,t0 +800000a8: 00800293 li t0,8 +800000ac: 30429073 csrw mie,t0 +800000b0: f0013c37 lui s8,0xf0013 +800000b4: 00100c93 li s9,1 +800000b8: 019c2023 sw s9,0(s8) # f0013000 +800000bc: 00000013 nop +800000c0: 00000013 nop +800000c4: 00000013 nop +800000c8: 00000013 nop +800000cc: 00000013 nop +800000d0: 00000013 nop +800000d4: 00000013 nop +800000d8: 00000013 nop +800000dc: 00000013 nop +800000e0: 00000013 nop +800000e4: 00000013 nop +800000e8: 00000013 nop +800000ec: 00300e13 li t3,3 +800000f0: 08000293 li t0,128 +800000f4: 30429073 csrw mie,t0 +800000f8: 00000013 nop +800000fc: 00000013 nop +80000100: 00000013 nop +80000104: 00000013 nop +80000108: 00000013 nop +8000010c: 00000013 nop +80000110: 00000013 nop +80000114: 00400e13 li t3,4 +80000118: 000012b7 lui t0,0x1 +8000011c: 80028293 addi t0,t0,-2048 # 800 +80000120: 30429073 csrw mie,t0 +80000124: 00000013 nop +80000128: 00000013 nop +8000012c: 00000013 nop +80000130: 00000013 nop +80000134: 00000013 nop +80000138: 00000013 nop +8000013c: 00000013 nop +80000140: 00500e13 li t3,5 +80000144: f01001b7 lui gp,0xf0100 +80000148: f4018193 addi gp,gp,-192 # f00fff40 +8000014c: 0001a203 lw tp,0(gp) +80000150: 0041a283 lw t0,4(gp) +80000154: 3ff20213 addi tp,tp,1023 # 3ff +80000158: 0041a423 sw tp,8(gp) +8000015c: 0051a623 sw t0,12(gp) +80000160: 00000013 nop +80000164: 00000013 nop +80000168: 00000013 nop +8000016c: 00000013 nop +80000170: 00000013 nop +80000174: 00000013 nop +80000178: 00000013 nop +8000017c: 00000013 nop +80000180: 00000013 nop +80000184: 00000013 nop +80000188: 00000013 nop +8000018c: 00000013 nop +80000190: 00000013 nop +80000194: 00000013 nop +80000198: 00600e13 li t3,6 +8000019c: 08000213 li tp,128 +800001a0: 30421073 csrw mie,tp +800001a4: 00700e13 li t3,7 +800001a8: 10500073 wfi +800001ac: 00800e13 li t3,8 +800001b0: 00100193 li gp,1 +800001b4: 0041a023 sw tp,0(gp) +800001b8: 00900e13 li t3,9 +800001bc: 00419023 sh tp,0(gp) +800001c0: 00a00e13 li t3,10 +800001c4: 0001a203 lw tp,0(gp) +800001c8: 00b00e13 li t3,11 +800001cc: 00019203 lh tp,0(gp) +800001d0: 00c00e13 li t3,12 +800001d4: 00d00e13 li t3,13 +800001d8: 00002083 lw ra,0(zero) # 0 + +800001dc : +800001dc: 0020006f j 800001de +800001e0: 00002083 lw ra,0(zero) # 0 +800001e4: 00e00e13 li t3,14 +800001e8: 20200073 hret +800001ec: 00f00e13 li t3,15 +800001f0: f01000b7 lui ra,0xf0100 +800001f4: f6008093 addi ra,ra,-160 # f00fff60 +800001f8: 0000a103 lw sp,0(ra) +800001fc: 01000e13 li t3,16 +80000200: 0020a023 sw sp,0(ra) +80000204: 01100e13 li t3,17 +80000208: 00008067 ret ... diff --git a/src/test/cpp/raw/machineCsr/build/machineCsrCompressed.hex b/src/test/cpp/raw/machineCsr/build/machineCsrCompressed.hex index 1546672..d6c33e7 100644 --- a/src/test/cpp/raw/machineCsr/build/machineCsrCompressed.hex +++ b/src/test/cpp/raw/machineCsr/build/machineCsrCompressed.hex @@ -1,27 +1,37 @@ :0200000480007A -:1000000071A00100130000001300000013000000A5 +:100000006F004009130000001300000013000000FF :100010001300000013000000130000001300000094 -:10002000732E2034631C0E00130FC0FFF32E103408 -:10003000B3FEEE01910E73901E3421A8B70E00801E -:10004000337FDE0163170F00F32E1034910E73908F -:100050001E34B70E0080938E3E006397CE01373C6E -:1000600001F0814C23209C01B70E0080938E7E000E -:100070006394CE0173504030B70E0080938EBE0063 -:100080006394CE017350403073002030054E7300EE -:100090000000094EA14273A00230A1427390423089 -:1000A000373C01F0854C23209C0101000100010038 -:1000B0000100010001000100010001000100010038 -:1000C00001000D4E930200087390423001000100C0 -:1000D00001000100010001000100114EB7120000F3 -:1000E0009382028073904230010001000100010000 -:1000F000010001000100154EB70110F0938101F4D9 -:1001000003A2010083A241001302F23F23A4410095 -:1001100023A65100194E13020008731042301D4EE1 -:1001200073005010214E854123A04100254E23909D -:100130004100294E03A201002D4E03920100314ED1 -:10014000354E8320000083200000394E73002020AC -:100150003D4EB70010F0938000F603A10000414E21 -:1001600023A02000454E8280000000000000000017 -:10017000000000000000000000000000000000007F -:040000058000008CEB +:10002000732E2034631E0E00130FC0FFF32E103406 +:10003000B3FEEE01938E4E0073901E346F00C0012C +:10004000B70E0080337FDE0163180F00F32E1034EB +:10005000938E4E0073901E34B70E0080938E3E0038 +:100060006398CE01373C01F0930C000023209C01E3 +:10007000B70E0080938E7E006394CE0173504030A3 +:10008000B70E0080938EBE006394CE017350403053 +:1000900073002030130E100073000000130E2000B8 +:1000A0009302800073A0023093028000739042306C +:1000B000373C01F0930C100023209C01130000003A +:1000C00013000000130000001300000013000000E4 +:1000D00013000000130000001300000013000000D4 +:1000E000130000001300000013000000130E300086 +:1000F00093020008739042301300000013000000C8 +:1001000013000000130000001300000013000000A3 +:1001100013000000130E4000B7120000938202800B +:100120007390423013000000130000001300000021 +:100130001300000013000000130000001300000073 +:10014000130E5000B70110F0938101F403A20100D7 +:1001500083A241001302F23F23A4410023A65100D1 +:100160001300000013000000130000001300000043 +:100170001300000013000000130000001300000033 +:100180001300000013000000130000001300000023 +:100190001300000013000000130E6000130200089B +:1001A00073104230130E700073005010130E800055 +:1001B0009301100023A04100130E900023904100F2 +:1001C000130EA00003A20100130EB0000392010061 +:1001D000130EC000130ED000832000006F0020001B +:1001E00083200000130EE00073002020130EF000A7 +:1001F000B70010F0938000F603A10000130E000179 +:1002000023A02000130E10016780000000000000F2 +:1002100000000000000000000000000000000000DE +:0400000580000094E3 :00000001FF diff --git a/src/test/cpp/raw/machineCsr/src/crt.S b/src/test/cpp/raw/machineCsr/src/crt.S index bbe966d..91429db 100644 --- a/src/test/cpp/raw/machineCsr/src/crt.S +++ b/src/test/cpp/raw/machineCsr/src/crt.S @@ -102,6 +102,20 @@ _start: addi x4, x4, 1023 sw x4, 8(x3) sw x5, 12(x3) + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop li x28, 6 li x4, 0x080 csrw mie,x4 From 685c914227900f3142556391c9ce48d14ce56f2c Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Tue, 12 May 2020 23:58:28 +0200 Subject: [PATCH 66/91] Add i$ reduceBankWidth to take advantage of multi way by remaping the data location to reduce on chip ram data width --- .../scala/vexriscv/ip/InstructionCache.scala | 80 +++++++++++-------- .../vexriscv/TestIndividualFeatures.scala | 4 +- 2 files changed, 50 insertions(+), 34 deletions(-) diff --git a/src/main/scala/vexriscv/ip/InstructionCache.scala b/src/main/scala/vexriscv/ip/InstructionCache.scala index 43f5130..dc97444 100644 --- a/src/main/scala/vexriscv/ip/InstructionCache.scala +++ b/src/main/scala/vexriscv/ip/InstructionCache.scala @@ -24,7 +24,8 @@ case class InstructionCacheConfig( cacheSize : Int, twoCycleRam : Boolean = false, twoCycleRamInnerMux : Boolean = false, preResetFlush : Boolean = false, - bypassGen : Boolean = false ){ + bypassGen : Boolean = false, + reducedBankWidth : Boolean = false){ assert(!(twoCycleRam && !twoCycleCache)) @@ -286,23 +287,13 @@ class InstructionCache(p : InstructionCacheConfig, mmuParameter : MemoryTranslat val lineWidth = bytePerLine*8 val lineCount = cacheSize/bytePerLine - val wordWidth = cpuDataWidth - val wordWidthLog2 = log2Up(wordWidth) - val wordPerLine = lineWidth/wordWidth + val cpuWordWidth = cpuDataWidth val memWordPerLine = lineWidth/memDataWidth - val bytePerWord = wordWidth/8 - val bytePerMemWord = memDataWidth/8 + val bytePerCpuWord = cpuWordWidth/8 val wayLineCount = lineCount/wayCount - val wayLineLog2 = log2Up(wayLineCount) - val wayMemWordCount = wayLineCount * memWordPerLine val tagRange = addressWidth-1 downto log2Up(wayLineCount*bytePerLine) val lineRange = tagRange.low-1 downto log2Up(bytePerLine) - val wordRange = log2Up(bytePerLine)-1 downto log2Up(bytePerWord) - val memWordRange = log2Up(bytePerLine)-1 downto log2Up(bytePerMemWord) - val memWordToCpuWordRange = log2Up(bytePerMemWord)-1 downto log2Up(bytePerWord) - val tagLineRange = tagRange.high downto lineRange.low - val lineWordRange = lineRange.high downto wordRange.low case class LineTag() extends Bundle{ val valid = Bool @@ -310,10 +301,17 @@ class InstructionCache(p : InstructionCacheConfig, mmuParameter : MemoryTranslat val address = UInt(tagRange.length bit) } + val bankCount = wayCount + val bankWidth = if(!reducedBankWidth) memDataWidth else Math.max(cpuDataWidth, memDataWidth/wayCount) + val bankByteSize = cacheSize/bankCount + val bankWordCount = bankByteSize*8/bankWidth + val bankWordToCpuWordRange = log2Up(bankWidth/8)-1 downto log2Up(bytePerCpuWord) + val memToBankRatio = bankWidth*bankCount / memDataWidth + + val banks = Seq.fill(bankCount)(Mem(Bits(bankWidth bits), bankWordCount)) val ways = Seq.fill(wayCount)(new Area{ val tags = Mem(LineTag(),wayLineCount) - val datas = Mem(Bits(memDataWidth bits),wayMemWordCount) if(preResetFlush){ tags.initBigInt(List.fill(wayLineCount)(BigInt(0))) @@ -367,7 +365,7 @@ class InstructionCache(p : InstructionCacheConfig, mmuParameter : MemoryTranslat val write = new Area{ val tag = ways.map(_.tags.writePort) - val data = ways.map(_.datas.writePort) + val data = banks.map(_.writePort) } for(wayId <- 0 until wayCount){ @@ -378,13 +376,24 @@ class InstructionCache(p : InstructionCacheConfig, mmuParameter : MemoryTranslat tag.data.valid := flushCounter.msb tag.data.error := hadError || io.mem.rsp.error tag.data.address := address(tagRange) - - val data = write.data(wayId) - data.valid := io.mem.rsp.valid && wayHit - data.address := address(lineRange) @@ wordIndex - data.data := io.mem.rsp.data } + for((writeBank, bankId) <- write.data.zipWithIndex){ + if(!reducedBankWidth) { + writeBank.valid := io.mem.rsp.valid && wayToAllocate === bankId + writeBank.address := address(lineRange) @@ wordIndex + writeBank.data := io.mem.rsp.data + } else { + val sel = U(bankId) - wayToAllocate.value + val groupSel = wayToAllocate(log2Up(bankCount)-1 downto log2Up(bankCount/memToBankRatio)) + val subSel = sel(log2Up(bankCount/memToBankRatio) -1 downto 0) + writeBank.valid := io.mem.rsp.valid && groupSel === (bankId >> log2Up(bankCount/memToBankRatio)) + writeBank.address := address(lineRange) @@ wordIndex @@ (subSel) + writeBank.data := io.mem.rsp.data.subdivideIn(bankCount/memToBankRatio slices)(subSel) + } + } + + when(io.mem.rsp.valid) { wordIndex := (wordIndex + 1).resized hadError.setWhen(io.mem.rsp.error) @@ -394,17 +403,20 @@ class InstructionCache(p : InstructionCacheConfig, mmuParameter : MemoryTranslat } } - val fetchStage = new Area{ val read = new Area{ - val waysValues = for(way <- ways) yield new Area{ + val banksValue = for(bank <- banks) yield new Area{ + val dataMem = bank.readSync(io.cpu.prefetch.pc(lineRange.high downto log2Up(bankWidth/8)), !io.cpu.fetch.isStuck) + val data = if(!twoCycleRamInnerMux) dataMem.subdivideIn(cpuDataWidth bits).read(io.cpu.fetch.pc(bankWordToCpuWordRange)) else dataMem + } + + val waysValues = for((way, wayId) <- ways.zipWithIndex) yield new Area{ val tag = if(asyncTagMemory) { way.tags.readAsync(io.cpu.fetch.pc(lineRange)) }else { way.tags.readSync(io.cpu.prefetch.pc(lineRange), !io.cpu.fetch.isStuck) } - val dataMem = way.datas.readSync(io.cpu.prefetch.pc(lineRange.high downto memWordRange.low), !io.cpu.fetch.isStuck) - val data = if(!twoCycleRamInnerMux) dataMem.subdivideIn(cpuDataWidth bits).read(io.cpu.fetch.pc(memWordToCpuWordRange)) else dataMem +// val data = CombInit(banksValue(wayId).data) } } @@ -412,10 +424,11 @@ class InstructionCache(p : InstructionCacheConfig, mmuParameter : MemoryTranslat val hit = (!twoCycleRam) generate new Area{ val hits = read.waysValues.map(way => way.tag.valid && way.tag.address === io.cpu.fetch.mmuRsp.physicalAddress(tagRange)) val valid = Cat(hits).orR - val id = OHToUInt(hits) - val error = read.waysValues.map(_.tag.error).read(id) - val data = read.waysValues.map(_.data).read(id) - val word = if(cpuDataWidth == memDataWidth || !twoCycleRamInnerMux) CombInit(data) else data.subdivideIn(cpuDataWidth bits).read(io.cpu.fetch.pc(memWordToCpuWordRange)) + val wayId = OHToUInt(hits) + val bankId = if(!reducedBankWidth) wayId else (wayId >> log2Up(bankCount/memToBankRatio)) @@ ((wayId + (io.cpu.fetch.mmuRsp.physicalAddress(log2Up(bankWidth/8), log2Up(bankCount) bits))).resize(log2Up(bankCount/memToBankRatio))) + val error = read.waysValues.map(_.tag.error).read(wayId) + val data = read.banksValue.map(_.data).read(bankId) + val word = if(cpuDataWidth == memDataWidth || !twoCycleRamInnerMux) CombInit(data) else data.subdivideIn(cpuDataWidth bits).read(io.cpu.fetch.pc(bankWordToCpuWordRange)) io.cpu.fetch.data := (if(p.bypassGen) (io.cpu.fetch.dataBypassValid ? io.cpu.fetch.dataBypass | word) else word) if(twoCycleCache){ io.cpu.decode.data := RegNextWhen(io.cpu.fetch.data,!io.cpu.decode.isStuck) @@ -423,7 +436,7 @@ class InstructionCache(p : InstructionCacheConfig, mmuParameter : MemoryTranslat } if(twoCycleRam && wayCount == 1){ - val cacheData = if(cpuDataWidth == memDataWidth || !twoCycleRamInnerMux) CombInit(read.waysValues.head.data) else read.waysValues.head.data.subdivideIn(cpuDataWidth bits).read(io.cpu.fetch.pc(memWordToCpuWordRange)) + val cacheData = if(cpuDataWidth == memDataWidth || !twoCycleRamInnerMux) CombInit(read.banksValue.head.data) else read.banksValue.head.data.subdivideIn(cpuDataWidth bits).read(io.cpu.fetch.pc(bankWordToCpuWordRange)) io.cpu.fetch.data := (if(p.bypassGen) (io.cpu.fetch.dataBypassValid ? io.cpu.fetch.dataBypass | cacheData) else cacheData) } @@ -452,10 +465,11 @@ class InstructionCache(p : InstructionCacheConfig, mmuParameter : MemoryTranslat val tags = fetchStage.read.waysValues.map(way => stage(way.tag)) val hits = tags.map(tag => tag.valid && tag.address === mmuRsp.physicalAddress(tagRange)) val valid = Cat(hits).orR - val id = OHToUInt(hits) - val error = tags(id).error - val data = fetchStage.read.waysValues.map(way => stage(way.data)).read(id) - val word = if(cpuDataWidth == memDataWidth || !twoCycleRamInnerMux) data else data.subdivideIn(cpuDataWidth bits).read(io.cpu.decode.pc(memWordToCpuWordRange)) + val wayId = OHToUInt(hits) + val bankId = if(!reducedBankWidth) wayId else (wayId >> log2Up(bankCount/memToBankRatio)) @@ ((wayId + (mmuRsp.physicalAddress(log2Up(bankWidth/8), log2Up(bankCount) bits))).resize(log2Up(bankCount/memToBankRatio))) + val error = tags(wayId).error + val data = fetchStage.read.banksValue.map(bank => stage(bank.data)).read(bankId) + val word = if(cpuDataWidth == memDataWidth || !twoCycleRamInnerMux) data else data.subdivideIn(cpuDataWidth bits).read(io.cpu.decode.pc(bankWordToCpuWordRange)) if(p.bypassGen) when(stage(io.cpu.fetch.dataBypassValid)){ word := stage(io.cpu.fetch.dataBypass) } diff --git a/src/test/scala/vexriscv/TestIndividualFeatures.scala b/src/test/scala/vexriscv/TestIndividualFeatures.scala index 66b308b..f3f5b28 100644 --- a/src/test/scala/vexriscv/TestIndividualFeatures.scala +++ b/src/test/scala/vexriscv/TestIndividualFeatures.scala @@ -356,6 +356,7 @@ class IBusDimension(rvcRate : Double) extends VexRiscvDimension("IBus") { val catchAll = universes.contains(VexRiscvUniverse.CATCH_ALL) val compressed = r.nextDouble() < rvcRate val tighlyCoupled = r.nextBoolean() && !catchAll + val reducedBankWidth = r.nextBoolean() // val tighlyCoupled = false val prediction = random(r, List(NONE, STATIC, DYNAMIC, DYNAMIC_TARGET)) val relaxedPcCalculation, twoCycleCache, injectorStage = r.nextBoolean() @@ -392,7 +393,8 @@ class IBusDimension(rvcRate : Double) extends VexRiscvDimension("IBus") { asyncTagMemory = false, twoCycleRam = twoCycleRam, twoCycleCache = twoCycleCache, - twoCycleRamInnerMux = twoCycleRamInnerMux + twoCycleRamInnerMux = twoCycleRamInnerMux, + reducedBankWidth = reducedBankWidth ) ) if(tighlyCoupled) p.newTightlyCoupledPort(TightlyCoupledPortParameter("iBusTc", a => a(30 downto 28) === 0x0)) From 42fef8bbcdbb5a9058ee141ef80f5a8a46d4330e Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Tue, 12 May 2020 23:59:19 +0200 Subject: [PATCH 67/91] Smp cluster now use i$ reduceBankWidth --- src/main/scala/vexriscv/TestsWorkspace.scala | 9 ++-- .../demo/smp/VexRiscvSmpCluster.scala | 45 ++----------------- .../demo/smp/VexRiscvSmpLitexCluster.scala | 4 +- 3 files changed, 11 insertions(+), 47 deletions(-) diff --git a/src/main/scala/vexriscv/TestsWorkspace.scala b/src/main/scala/vexriscv/TestsWorkspace.scala index 8a78544..af1c77b 100644 --- a/src/main/scala/vexriscv/TestsWorkspace.scala +++ b/src/main/scala/vexriscv/TestsWorkspace.scala @@ -60,9 +60,9 @@ object TestsWorkspace { prediction = STATIC, injectorStage = false, config = InstructionCacheConfig( - cacheSize = 4096*1, + cacheSize = 4096*2, bytePerLine = 64, - wayCount = 1, + wayCount = 2, addressWidth = 32, cpuDataWidth = 32, memDataWidth = 128, @@ -70,7 +70,8 @@ object TestsWorkspace { catchAccessFault = true, asyncTagMemory = false, twoCycleRam = true, - twoCycleCache = true + twoCycleCache = true, + reducedBankWidth = true // ) ), memoryTranslatorPortConfig = MmuPortConfig( @@ -129,7 +130,7 @@ object TestsWorkspace { catchIllegalInstruction = true ), new RegFilePlugin( - regFileReadyKind = plugin.SYNC, + regFileReadyKind = plugin.ASYNC, zeroBoot = true ), new IntAluPlugin, diff --git a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala index b960643..0a8c3ec 100644 --- a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala +++ b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala @@ -120,22 +120,6 @@ object VexRiscvSmpClusterGen { new MmuPlugin( ioRange = ioRange ), - //Uncomment the whole IBusSimplePlugin and comment IBusCachedPlugin if you want uncached iBus config - // new IBusSimplePlugin( - // resetVector = 0x80000000l, - // cmdForkOnSecondStage = false, - // cmdForkPersistence = false, - // prediction = DYNAMIC_TARGET, - // historyRamSizeLog2 = 10, - // catchAccessFault = true, - // compressedGen = true, - // busLatencyMin = 1, - // injectorStage = true, - // memoryTranslatorPortConfig = withMmu generate MmuPortConfig( - // portTlbSize = 4 - // ) - // ), - //Uncomment the whole IBusCachedPlugin and comment IBusSimplePlugin if you want cached iBus config new IBusCachedPlugin( resetVector = resetVector, @@ -146,9 +130,9 @@ object VexRiscvSmpClusterGen { injectorStage = false, relaxedPcCalculation = true, config = InstructionCacheConfig( - cacheSize = 4096*1, + cacheSize = 4096*2, bytePerLine = 64, - wayCount = 1, + wayCount = 2, addressWidth = 32, cpuDataWidth = 32, memDataWidth = 128, @@ -156,8 +140,8 @@ object VexRiscvSmpClusterGen { catchAccessFault = true, asyncTagMemory = false, twoCycleRam = false, - twoCycleCache = true - // ) + twoCycleCache = true, + reducedBankWidth = true ), memoryTranslatorPortConfig = MmuPortConfig( portTlbSize = 4, @@ -166,16 +150,6 @@ object VexRiscvSmpClusterGen { earlyCacheHits = true ) ), - // ).newTightlyCoupledPort(TightlyCoupledPortParameter("iBusTc", a => a(30 downto 28) === 0x0 && a(5))), - // new DBusSimplePlugin( - // catchAddressMisaligned = true, - // catchAccessFault = true, - // earlyInjection = false, - // withLrSc = true, - // memoryTranslatorPortConfig = withMmu generate MmuPortConfig( - // portTlbSize = 4 - // ) - // ), new DBusCachedPlugin( dBusCmdMasterPipe = true, dBusCmdSlavePipe = true, @@ -204,13 +178,6 @@ object VexRiscvSmpClusterGen { earlyCacheHits = true ) ), - - // new MemoryTranslatorPlugin( - // tlbSize = 32, - // virtualRange = _(31 downto 28) === 0xC, - // ioRange = _(31 downto 28) === 0xF - // ), - new DecoderSimplePlugin( catchIllegalInstruction = true ), @@ -234,8 +201,6 @@ object VexRiscvSmpClusterGen { pessimisticWriteRegFile = false, pessimisticAddressMatch = false ), - // new HazardSimplePlugin(false, true, false, true), - // new HazardSimplePlugin(false, false, false, false), new MulPlugin, new MulDivIterativePlugin( genMul = false, @@ -243,9 +208,7 @@ object VexRiscvSmpClusterGen { mulUnrollFactor = 32, divUnrollFactor = 1 ), - // new DivPlugin, new CsrPlugin(CsrPluginConfig.openSbi(hartId = hartId, misa = Riscv.misaToInt("imas"))), - new BranchPlugin( earlyBranch = false, catchAddressMisaligned = true, diff --git a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexCluster.scala b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexCluster.scala index 9bfca0b..4bf62e9 100644 --- a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexCluster.scala +++ b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexCluster.scala @@ -350,8 +350,8 @@ object VexRiscvLitexSmpClusterGen extends App { debugClockDomain = ClockDomain.current.copy(reset = Bool().setName("debugResetIn")) ) -// SpinalVerilog(Bench.compressIo(dutGen)) - SpinalVerilog(dutGen) + SpinalVerilog(Bench.compressIo(dutGen)) +// SpinalVerilog(dutGen) } From cf60989ae1662a3f9ce0cf0a5226f26d377fb557 Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Thu, 14 May 2020 00:05:54 +0200 Subject: [PATCH 68/91] Litex smp cluster now blackboxify d$ data ram --- .../scala/vexriscv/demo/smp/VexRiscvSmpLitexCluster.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexCluster.scala b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexCluster.scala index 4bf62e9..03ccc89 100644 --- a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexCluster.scala +++ b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexCluster.scala @@ -329,7 +329,6 @@ case class VexRiscvLitexSmpCluster(p : VexRiscvLitexSmpClusterParameter, object VexRiscvLitexSmpClusterGen extends App { val cpuCount = 4 - val withStall = false def parameter = VexRiscvLitexSmpClusterParameter( cluster = VexRiscvSmpClusterParameter( @@ -350,8 +349,9 @@ object VexRiscvLitexSmpClusterGen extends App { debugClockDomain = ClockDomain.current.copy(reset = Bool().setName("debugResetIn")) ) - SpinalVerilog(Bench.compressIo(dutGen)) -// SpinalVerilog(dutGen) + val genConfig = SpinalConfig().addStandardMemBlackboxing(blackboxByteEnables) +// genConfig.generateVerilog(Bench.compressIo(dutGen)) + genConfig.generateVerilog(dutGen) } From 380afa3130463fe214944366617632ebfb3e01ff Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Wed, 20 May 2020 13:45:52 +0200 Subject: [PATCH 69/91] SpinalHDL 1.4.2 --- build.sbt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.sbt b/build.sbt index c1510b2..5fba42b 100644 --- a/build.sbt +++ b/build.sbt @@ -5,7 +5,7 @@ lazy val root = (project in file(".")). scalaVersion := "2.11.12", version := "2.0.0" )), - scalacOptions += s"-Xplugin:${new File(baseDirectory.value + "/../SpinalHDL/idslplugin/target/scala-2.11/spinalhdl-idsl-plugin_2.11-1.4.1.jar")}", + scalacOptions += s"-Xplugin:${new File(baseDirectory.value + "/../SpinalHDL/idslplugin/target/scala-2.11/spinalhdl-idsl-plugin_2.11-1.4.2.jar")}", scalacOptions += s"-Xplugin-require:idsl-plugin", libraryDependencies ++= Seq( // "com.github.spinalhdl" % "spinalhdl-core_2.11" % "1.3.6", From a64fd9cf3b9f03b8b1e313c80ab744e4dace15a3 Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Wed, 20 May 2020 13:49:10 +0200 Subject: [PATCH 70/91] Add CsrPlugin external hartid d$ rsp/sync now decrement pendings by signal amount --- .../demo/smp/VexRiscvSmpCluster.scala | 27 +-- .../demo/smp/VexRiscvSmpLitexCluster.scala | 99 +++++----- src/main/scala/vexriscv/ip/DataCache.scala | 174 ++++++++++++++---- .../scala/vexriscv/plugin/CsrPlugin.scala | 14 +- .../vexriscv/plugin/DBusCachedPlugin.scala | 1 + 5 files changed, 222 insertions(+), 93 deletions(-) diff --git a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala index 0a8c3ec..38506f2 100644 --- a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala +++ b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala @@ -54,8 +54,9 @@ case class VexRiscvSmpCluster(p : VexRiscvSmpClusterParameter, val core = new VexRiscv(cpuConfig) core.plugins.foreach { case plugin: IBusCachedPlugin => iBus = plugin.iBus.toBmb() - case plugin: DBusCachedPlugin => dBus = plugin.dBus.toBmb() + case plugin: DBusCachedPlugin => dBus = plugin.dBus.toBmb().pipelined(cmdValid = true) case plugin: CsrPlugin => { + plugin.externalMhartId := cpuId plugin.softwareInterrupt := io.softwareInterrupts(cpuId) plugin.externalInterrupt := io.externalInterrupts(cpuId) plugin.timerInterrupt := io.timerInterrupts(cpuId) @@ -112,9 +113,12 @@ case class VexRiscvSmpCluster(p : VexRiscvSmpClusterParameter, object VexRiscvSmpClusterGen { - def vexRiscvConfig(hartId : Int, + def vexRiscvConfig(hartIdWidth : Int, + hartId : Int, ioRange : UInt => Bool = (x => x(31 downto 28) === 0xF), resetVector : Long = 0x80000000l) = { + val iBusWidth = 128 + val dBusWidth = 64 val config = VexRiscvConfig( plugins = List( new MmuPlugin( @@ -135,7 +139,7 @@ object VexRiscvSmpClusterGen { wayCount = 2, addressWidth = 32, cpuDataWidth = 32, - memDataWidth = 128, + memDataWidth = iBusWidth, catchIllegalAccess = true, catchAccessFault = true, asyncTagMemory = false, @@ -151,7 +155,7 @@ object VexRiscvSmpClusterGen { ) ), new DBusCachedPlugin( - dBusCmdMasterPipe = true, + dBusCmdMasterPipe = dBusWidth == 32, dBusCmdSlavePipe = true, dBusRspSlavePipe = true, relaxedMemoryTranslationRegister = true, @@ -161,14 +165,15 @@ object VexRiscvSmpClusterGen { wayCount = 1, addressWidth = 32, cpuDataWidth = 32, - memDataWidth = 32, + memDataWidth = dBusWidth, catchAccessError = true, catchIllegal = true, catchUnaligned = true, withLrSc = true, withAmo = true, withExclusive = true, - withInvalidate = true + withInvalidate = true, + aggregationWidth = if(dBusWidth == 32) 0 else log2Up(dBusWidth/8) // ) ), memoryTranslatorPortConfig = MmuPortConfig( @@ -208,7 +213,7 @@ object VexRiscvSmpClusterGen { mulUnrollFactor = 32, divUnrollFactor = 1 ), - new CsrPlugin(CsrPluginConfig.openSbi(hartId = hartId, misa = Riscv.misaToInt("imas"))), + new CsrPlugin(CsrPluginConfig.openSbi(misa = Riscv.misaToInt("imas")).copy(withExternalMhartid = true, mhartidWidth = hartIdWidth)), new BranchPlugin( earlyBranch = false, catchAddressMisaligned = true, @@ -224,7 +229,7 @@ object VexRiscvSmpClusterGen { debugClockDomain = ClockDomain.current.copy(reset = Bool().setName("debugResetIn")), p = VexRiscvSmpClusterParameter( cpuConfigs = List.tabulate(cpuCount) { - vexRiscvConfig(_, resetVector = resetVector) + vexRiscvConfig(log2Up(cpuCount), _, resetVector = resetVector) } ) ) @@ -462,7 +467,7 @@ object VexRiscvSmpClusterOpenSbi extends App{ simConfig.allOptimisation simConfig.addSimulatorFlag("--threads 1") - val cpuCount = 1 + val cpuCount = 4 val withStall = false def gen = { @@ -573,8 +578,8 @@ object VexRiscvSmpClusterOpenSbi extends App{ // fork{ // disableSimWave() -// val atMs = 130 -// val durationMs = 15 +// val atMs = 3790 +// val durationMs = 5 // sleep(atMs*1000000) // enableSimWave() // println("** enableSimWave **") diff --git a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexCluster.scala b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexCluster.scala index 03ccc89..3f3047f 100644 --- a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexCluster.scala +++ b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexCluster.scala @@ -73,13 +73,13 @@ case class LiteDramNative(p : LiteDramNativeParameter) extends Bundle with IMast } var writeCmdCounter, writeDataCounter = 0 - StreamReadyRandomizer(bus.cmd, cd) + StreamReadyRandomizer(bus.cmd, cd).factor = 0.5f StreamMonitor(bus.cmd, cd) { t => cmdQueue.enqueue(Cmd(t.addr.toLong * (p.dataWidth/8) , t.we.toBoolean)) if(t.we.toBoolean) writeCmdCounter += 1 } - StreamReadyRandomizer(bus.wdata, cd) + StreamReadyRandomizer(bus.wdata, cd).factor = 0.5f StreamMonitor(bus.wdata, cd) { p => writeDataCounter += 1 // if(p.data.toBigInt == BigInt("00000002000000020000000200000002",16)){ @@ -175,16 +175,19 @@ case class BmbToLiteDram(bmbParameter : BmbParameter, val halt = Bool() val (cmdFork, dataFork) = StreamFork2(unburstified.cmd.haltWhen(halt)) - io.output.cmd.arbitrationFrom(cmdFork.haltWhen(pendingRead.msb)) - io.output.cmd.addr := (cmdFork.address >> log2Up(liteDramParameter.dataWidth/8)).resized - io.output.cmd.we := cmdFork.isWrite + val outputCmd = Stream(LiteDramNativeCmd(liteDramParameter)) + outputCmd.arbitrationFrom(cmdFork.haltWhen(pendingRead.msb)) + outputCmd.addr := (cmdFork.address >> log2Up(liteDramParameter.dataWidth/8)).resized + outputCmd.we := cmdFork.isWrite + + io.output.cmd <-< outputCmd if(bmbParameter.canWrite) { val wData = Stream(LiteDramNativeWData(liteDramParameter)) wData.arbitrationFrom(dataFork.throwWhen(dataFork.isRead)) wData.data := dataFork.data wData.we := dataFork.mask - io.output.wdata << wData.queue(wdataFifoSize) + io.output.wdata << wData.queueLowLatency(wdataFifoSize, latency = 1) //TODO queue low latency } else { dataFork.ready := True io.output.wdata.valid := False @@ -212,7 +215,7 @@ case class BmbToLiteDram(bmbParameter : BmbParameter, unburstified.rsp.data := rdataFifo.data - pendingRead := pendingRead + U(io.output.cmd.fire && !io.output.cmd.we) - U(rdataFifo.fire) + pendingRead := pendingRead + U(outputCmd.fire && !outputCmd.we) - U(rdataFifo.fire) } object BmbToLiteDramTester extends App{ @@ -241,6 +244,7 @@ case class VexRiscvLitexSmpClusterParameter( cluster : VexRiscvSmpClusterParamet liteDram : LiteDramNativeParameter, liteDramMapping : AddressMapping) +//addAttribute("""mark_debug = "true"""") case class VexRiscvLitexSmpCluster(p : VexRiscvLitexSmpClusterParameter, debugClockDomain : ClockDomain) extends Component{ @@ -308,50 +312,59 @@ case class VexRiscvLitexSmpCluster(p : VexRiscvLitexSmpClusterParameter, iBusDecoder.io.input << iBusArbiter.io.output.pipelined(cmdValid = true) val iMem = LiteDramNative(p.liteDram) - val iMemBridge = iMem.fromBmb(iBusDecoder.io.outputs(1), wdataFifoSize = 0, rdataFifoSize = 32) - iMem.cmd >-> io.iMem.cmd - iMem.wdata >> io.iMem.wdata - iMem.rdata << io.iMem.rdata + io.iMem.fromBmb(iBusDecoder.io.outputs(1), wdataFifoSize = 0, rdataFifoSize = 32) + val iBusDecoderToPeripheral = iBusDecoder.io.outputs(0).resize(dataWidth = 32).pipelined(cmdHalfRate = true, rspValid = true) + val dBusDecoderToPeripheral = dBusDecoder.io.outputs(0).resize(dataWidth = 32).pipelined(cmdHalfRate = true, rspValid = true) + val peripheralAccessLength = Math.max(iBusDecoder.io.outputs(0).p.lengthWidth, dBusDecoder.io.outputs(0).p.lengthWidth) val peripheralArbiter = BmbArbiter( - p = dBusDecoder.io.outputs(0).p.copy(sourceWidth = dBusDecoder.io.outputs(0).p.sourceWidth + 1, lengthWidth = peripheralAccessLength), + p = dBusDecoder.io.outputs(0).p.copy( + sourceWidth = List(iBusDecoderToPeripheral, dBusDecoderToPeripheral).map(_.p.sourceWidth).max + 1, + contextWidth = List(iBusDecoderToPeripheral, dBusDecoderToPeripheral).map(_.p.contextWidth).max, + lengthWidth = peripheralAccessLength, + dataWidth = 32 + ), portCount = 2, lowerFirstPriority = true ) - peripheralArbiter.io.inputs(0) << iBusDecoder.io.outputs(0).resize(dataWidth = 32).pipelined(cmdHalfRate = true, rspValid = true) - peripheralArbiter.io.inputs(1) << dBusDecoder.io.outputs(0).resize(dataWidth = 32).pipelined(cmdHalfRate = true, rspValid = true) + peripheralArbiter.io.inputs(0) << iBusDecoderToPeripheral + peripheralArbiter.io.inputs(1) << dBusDecoderToPeripheral val peripheralWishbone = peripheralArbiter.io.output.pipelined(cmdValid = true).toWishbone() io.peripheral << peripheralWishbone } object VexRiscvLitexSmpClusterGen extends App { - val cpuCount = 4 + for(cpuCount <- List(1,2,4,8)) { + def parameter = VexRiscvLitexSmpClusterParameter( + cluster = VexRiscvSmpClusterParameter( + cpuConfigs = List.tabulate(cpuCount) { hartId => + vexRiscvConfig( + hartIdWidth = log2Up(cpuCount), + hartId = hartId, + ioRange = address => address.msb, + resetVector = 0 + ) + } + ), + liteDram = LiteDramNativeParameter(addressWidth = 32, dataWidth = 128), + liteDramMapping = SizeMapping(0x40000000l, 0x40000000l) + ) - def parameter = VexRiscvLitexSmpClusterParameter( - cluster = VexRiscvSmpClusterParameter( - cpuConfigs = List.tabulate(cpuCount) { hartId => - vexRiscvConfig( - hartId = hartId, - ioRange = address => address.msb, - resetVector = 0 - ) - } - ), - liteDram = LiteDramNativeParameter(addressWidth = 32, dataWidth = 128), - liteDramMapping = SizeMapping(0x40000000l, 0x40000000l) - ) + def dutGen = { + val toplevel = VexRiscvLitexSmpCluster( + p = parameter, + debugClockDomain = ClockDomain.current.copy(reset = Bool().setName("debugResetIn")) + ) + toplevel + } - def dutGen = VexRiscvLitexSmpCluster( - p = parameter, - debugClockDomain = ClockDomain.current.copy(reset = Bool().setName("debugResetIn")) - ) - - val genConfig = SpinalConfig().addStandardMemBlackboxing(blackboxByteEnables) -// genConfig.generateVerilog(Bench.compressIo(dutGen)) - genConfig.generateVerilog(dutGen) + val genConfig = SpinalConfig().addStandardMemBlackboxing(blackboxByteEnables) + // genConfig.generateVerilog(Bench.compressIo(dutGen)) + genConfig.generateVerilog(dutGen.setDefinitionName(s"VexRiscvLitexSmpCluster_${cpuCount}c")) + } } @@ -363,13 +376,13 @@ object VexRiscvLitexSmpClusterOpenSbi extends App{ simConfig.withWave simConfig.allOptimisation - val cpuCount = 4 - val withStall = false + val cpuCount = 8 def parameter = VexRiscvLitexSmpClusterParameter( cluster = VexRiscvSmpClusterParameter( cpuConfigs = List.tabulate(cpuCount) { hartId => vexRiscvConfig( + hartIdWidth = log2Up(cpuCount), hartId = hartId, ioRange = address => address(31 downto 28) === 0xF, resetVector = 0x80000000l @@ -440,12 +453,12 @@ object VexRiscvLitexSmpClusterOpenSbi extends App{ // fork{ // disableSimWave() -// val atMs = 8 -// val durationMs = 3 -// sleep(atMs*1000000) +// val atMs = 3790 +// val durationMs = 5 +// sleep(atMs*1000000l) // enableSimWave() // println("** enableSimWave **") -// sleep(durationMs*1000000) +// sleep(durationMs*1000000l) // println("** disableSimWave **") // while(true) { // disableSimWave() @@ -453,7 +466,7 @@ object VexRiscvLitexSmpClusterOpenSbi extends App{ // enableSimWave() // sleep( 100 * 10) // } -// // simSuccess() +// // simSuccess() // } fork{ diff --git a/src/main/scala/vexriscv/ip/DataCache.scala b/src/main/scala/vexriscv/ip/DataCache.scala index 2f2e8c2..82fa3af 100644 --- a/src/main/scala/vexriscv/ip/DataCache.scala +++ b/src/main/scala/vexriscv/ip/DataCache.scala @@ -5,7 +5,7 @@ import spinal.core._ import spinal.lib._ import spinal.lib.bus.amba4.axi.{Axi4Config, Axi4Shared} import spinal.lib.bus.avalon.{AvalonMM, AvalonMMConfig} -import spinal.lib.bus.bmb.{Bmb, BmbParameter} +import spinal.lib.bus.bmb.{Bmb, BmbCmd, BmbParameter} import spinal.lib.bus.wishbone.{Wishbone, WishboneConfig} import spinal.lib.bus.simple._ import vexriscv.plugin.DBusSimpleBus @@ -29,7 +29,8 @@ case class DataCacheConfig(cacheSize : Int, withInvalidate : Boolean = false, pendingMax : Int = 32, directTlbHit : Boolean = false, - mergeExecuteMemory : Boolean = false){ + mergeExecuteMemory : Boolean = false, + aggregationWidth : Int = 0){ assert(!(mergeExecuteMemory && (earlyDataMux || earlyWaysHits))) assert(!(earlyDataMux && !earlyWaysHits)) assert(isPow2(pendingMax)) @@ -41,6 +42,8 @@ case class DataCacheConfig(cacheSize : Int, def withInternalLrSc = withLrSc && !withExclusive def withExternalLrSc = withLrSc && withExclusive def withExternalAmo = withAmo && withExclusive + def cpuDataBytes = cpuDataWidth/8 + def memDataBytes = memDataWidth/8 def getAxi4SharedConfig() = Axi4Config( addressWidth = addressWidth, dataWidth = memDataWidth, @@ -79,10 +82,10 @@ case class DataCacheConfig(cacheSize : Int, def getBmbParameter() = BmbParameter( addressWidth = 32, - dataWidth = 32, + dataWidth = memDataWidth, lengthWidth = log2Up(this.bytePerLine), sourceWidth = 0, - contextWidth = if(!withWriteResponse) 1 else 0, + contextWidth = (if(!withWriteResponse) 1 else 0) + (if(cpuDataWidth != memDataWidth) log2Up(memDataBytes) else 0), canRead = true, canWrite = true, alignment = BmbParameter.BurstAlignement.LENGTH, @@ -203,6 +206,7 @@ case class DataCacheMemCmd(p : DataCacheConfig) extends Bundle{ val last = Bool } case class DataCacheMemRsp(p : DataCacheConfig) extends Bundle{ + val aggregated = UInt(p.aggregationWidth bits) val last = Bool() val data = Bits(p.memDataWidth bit) val error = Bool @@ -217,7 +221,7 @@ case class DataCacheAck(p : DataCacheConfig) extends Bundle{ } case class DataCacheSync(p : DataCacheConfig) extends Bundle{ - + val aggregated = UInt(p.aggregationWidth bits) } case class DataCacheMemBus(p : DataCacheConfig) extends Bundle with IMasterSlave{ @@ -369,21 +373,133 @@ case class DataCacheMemBus(p : DataCacheConfig) extends Bundle with IMasterSlave } - def toBmb() : Bmb = { + def toBmb(syncPendingMax : Int = 16, + timeoutCycles : Int = 16) : Bmb = new Area{ + setCompositeName(DataCacheMemBus.this, "Bridge", true) val pipelinedMemoryBusConfig = p.getBmbParameter() val bus = Bmb(pipelinedMemoryBusConfig).setCompositeName(this,"toBmb", true) + val aggregationMax = p.memDataBytes - bus.cmd.valid := cmd.valid - bus.cmd.last := cmd.last - if(!p.withWriteResponse) bus.cmd.context(0) := cmd.wr - bus.cmd.opcode := (cmd.wr ? B(Bmb.Cmd.Opcode.WRITE) | B(Bmb.Cmd.Opcode.READ)) - bus.cmd.address := cmd.address.resized - bus.cmd.data := cmd.data - bus.cmd.length := (cmd.length << 2) | 3 //TODO better sub word access - bus.cmd.mask := cmd.mask - if(p.withExclusive) bus.cmd.exclusive := cmd.exclusive + case class Context() extends Bundle{ + val isWrite = !p.withWriteResponse generate Bool() + val rspCount = (p.cpuDataWidth != p.memDataWidth) generate UInt(log2Up(aggregationMax) bits) + } + + val withoutWriteBuffer = if(p.cpuDataWidth == p.memDataWidth) new Area { + val busCmdContext = Context() + + bus.cmd.valid := cmd.valid + bus.cmd.last := cmd.last + bus.cmd.opcode := (cmd.wr ? B(Bmb.Cmd.Opcode.WRITE) | B(Bmb.Cmd.Opcode.READ)) + bus.cmd.address := cmd.address.resized + bus.cmd.data := cmd.data + bus.cmd.length := (cmd.length << 2) | 3 + bus.cmd.mask := cmd.mask + if (p.withExclusive) bus.cmd.exclusive := cmd.exclusive + if (!p.withWriteResponse) busCmdContext.isWrite := cmd.wr + bus.cmd.context := B(busCmdContext) + + cmd.ready := bus.cmd.ready + if(p.withInvalidate) sync.arbitrationFrom(bus.sync) + } + + val withWriteBuffer = if(p.cpuDataWidth != p.memDataWidth) new Area { + val buffer = new Area { + val stream = cmd.toEvent().m2sPipe() + val address = Reg(UInt(p.addressWidth bits)) + val length = Reg(UInt(pipelinedMemoryBusConfig.lengthWidth bits)) + val write = Reg(Bool) + val exclusive = Reg(Bool) + val data = Reg(Bits(p.memDataWidth bits)) + val mask = Reg(Bits(p.memDataWidth/8 bits)) init(0) + } + + val aggregationRange = log2Up(p.memDataWidth/8)-1 downto log2Up(p.cpuDataWidth/8) + val tagRange = p.addressWidth-1 downto aggregationRange.high+1 + val aggregationEnabled = Reg(Bool) + val aggregationCounter = Reg(UInt(log2Up(aggregationMax) bits)) init(0) + val aggregationCounterFull = aggregationCounter === aggregationCounter.maxValue + val timer = Reg(UInt(log2Up(timeoutCycles)+1 bits)) init(0) + val timerFull = timer.msb + val hit = cmd.address(tagRange) === buffer.address(tagRange) + val canAggregate = cmd.valid && cmd.wr && !cmd.uncached && !cmd.exclusive && !timerFull && !aggregationCounterFull && (!buffer.stream.valid || aggregationEnabled && hit) + val doFlush = cmd.valid && !canAggregate || timerFull || aggregationCounterFull || !aggregationEnabled +// val canAggregate = False +// val doFlush = True + val busCmdContext = Context() + val halt = False + + when(cmd.fire){ + aggregationCounter := aggregationCounter + 1 + } + when(buffer.stream.valid && !timerFull){ + timer := timer + 1 + } + when(bus.cmd.fire || !buffer.stream.valid){ + buffer.mask := 0 + aggregationCounter := 0 + timer := 0 + } + + buffer.stream.ready := (bus.cmd.ready && doFlush || canAggregate) && !halt + bus.cmd.valid := buffer.stream.valid && doFlush && !halt + bus.cmd.last := True + bus.cmd.opcode := (buffer.write ? B(Bmb.Cmd.Opcode.WRITE) | B(Bmb.Cmd.Opcode.READ)) + bus.cmd.address := buffer.address + bus.cmd.length := buffer.length + bus.cmd.data := buffer.data + bus.cmd.mask := buffer.mask + + if (p.withExclusive) bus.cmd.exclusive := buffer.exclusive + bus.cmd.context.removeAssignments() := B(busCmdContext) + if (!p.withWriteResponse) busCmdContext.isWrite := bus.cmd.isWrite + busCmdContext.rspCount := aggregationCounter + + val aggregationSel = cmd.address(aggregationRange) + when(cmd.fire){ + val dIn = cmd.data.subdivideIn(8 bits) + val dReg = buffer.data.subdivideIn(8 bits) + for(byteId <- 0 until p.memDataBytes){ + when(aggregationSel === byteId / p.cpuDataBytes && cmd.mask(byteId % p.cpuDataBytes)){ + dReg.write(byteId, dIn(byteId % p.cpuDataBytes)) + buffer.mask(byteId) := True + } + } + } + + when(cmd.fire){ + buffer.write := cmd.wr + buffer.address := cmd.address.resized + buffer.length := (cmd.length << 2) | 3 + if (p.withExclusive) buffer.exclusive := cmd.exclusive + + when(cmd.wr && !cmd.uncached && !cmd.exclusive){ + aggregationEnabled := True + buffer.address(aggregationRange.high downto 0) := 0 + buffer.length := p.memDataBytes-1 + } otherwise { + aggregationEnabled := False + } + } + + + val rspCtx = bus.rsp.context.as(Context()) + rsp.aggregated := rspCtx.rspCount + + val syncLogic = p.withInvalidate generate new Area{ + val cmdCtx = Stream(UInt(log2Up(aggregationMax) bits)) + cmdCtx.valid := bus.cmd.fire && bus.cmd.isWrite + cmdCtx.payload := aggregationCounter + halt setWhen(!cmdCtx.ready) + + val syncCtx = cmdCtx.queueLowLatency(syncPendingMax, latency = 1) + syncCtx.ready := bus.sync.fire + + sync.arbitrationFrom(bus.sync) + sync.aggregated := syncCtx.payload + } + } - cmd.ready := bus.cmd.ready rsp.valid := bus.rsp.valid if(!p.withWriteResponse) rsp.valid clearWhen(bus.rsp.context(0)) @@ -399,21 +515,9 @@ case class DataCacheMemBus(p : DataCacheConfig) extends Bundle with IMasterSlave inv.enable := bus.inv.all bus.ack.arbitrationFrom(ack) - - sync.arbitrationFrom(bus.sync) - -// bus.ack.arbitrationFrom(ack) -// //TODO manage lenght ? -// inv.address := bus.inv.address -//// inv.opcode := bus.inv.opcode -// ??? -// -// bus.ack.arbitrationFrom(ack) + // //TODO manage lenght ? } - - - bus - } + }.bus } @@ -537,7 +641,7 @@ class DataCache(val p : DataCacheConfig, mmuParameter : MemoryTranslatorBusParam val memCmdSent = RegInit(False) setWhen (io.mem.cmd.ready) clearWhen (!io.cpu.writeBack.isStuck) val pending = withExclusive generate new Area{ val counter = Reg(UInt(log2Up(pendingMax) + 1 bits)) init(0) - val counterNext = counter + U(io.mem.cmd.fire && io.mem.cmd.last) - U(io.mem.rsp.valid && io.mem.rsp.last) + val counterNext = counter + U(io.mem.cmd.fire && io.mem.cmd.last) - ((io.mem.rsp.valid && io.mem.rsp.last) ? (io.mem.rsp.aggregated +^ 1) | 0) counter := counterNext val done = RegNext(counterNext === 0) @@ -554,7 +658,7 @@ class DataCache(val p : DataCacheConfig, mmuParameter : MemoryTranslatorBusParam val sync = withInvalidate generate new Area{ io.mem.sync.ready := True - + val syncCount = io.mem.sync.aggregated +^ 1 val syncContext = new Area{ val history = Mem(Bool, pendingMax) val wPtr, rPtr = Reg(UInt(log2Up(pendingMax)+1 bits)) init(0) @@ -564,7 +668,7 @@ class DataCache(val p : DataCacheConfig, mmuParameter : MemoryTranslatorBusParam } when(io.mem.sync.fire){ - rPtr := rPtr + 1 + rPtr := rPtr + syncCount } val uncached = history.readAsync(rPtr.resized) val full = RegNext(wPtr - rPtr >= pendingMax-1) @@ -573,7 +677,7 @@ class DataCache(val p : DataCacheConfig, mmuParameter : MemoryTranslatorBusParam def pending(inc : Bool, dec : Bool) = new Area { val pendingSync = Reg(UInt(log2Up(pendingMax) + 1 bits)) init(0) - val pendingSyncNext = pendingSync + U(io.mem.cmd.fire && io.mem.cmd.wr && inc) - U(io.mem.sync.fire && dec) + val pendingSyncNext = pendingSync + U(io.mem.cmd.fire && io.mem.cmd.wr && inc) - ((io.mem.sync.fire && dec) ? syncCount | 0) pendingSync := pendingSyncNext } @@ -582,7 +686,7 @@ class DataCache(val p : DataCacheConfig, mmuParameter : MemoryTranslatorBusParam def track(load : Bool, uncached : Boolean) = new Area { val counter = Reg(UInt(log2Up(pendingMax) + 1 bits)) init(0) - counter := counter - U(io.mem.sync.fire && counter =/= 0 && (if(uncached) syncContext.uncached else !syncContext.uncached)) + counter := counter - ((io.mem.sync.fire && counter =/= 0 && (if(uncached) syncContext.uncached else !syncContext.uncached)) ? syncCount | 0) when(load){ counter := (if(uncached) writeUncached.pendingSyncNext else writeCached.pendingSyncNext) } val busy = counter =/= 0 diff --git a/src/main/scala/vexriscv/plugin/CsrPlugin.scala b/src/main/scala/vexriscv/plugin/CsrPlugin.scala index 43dbfaf..23f3323 100644 --- a/src/main/scala/vexriscv/plugin/CsrPlugin.scala +++ b/src/main/scala/vexriscv/plugin/CsrPlugin.scala @@ -39,7 +39,7 @@ case class CsrPluginConfig( marchid : BigInt, mimpid : BigInt, mhartid : BigInt, - misaExtensionsInit : Int, + misaExtensionsInit : Int, misaAccess : CsrAccess, mtvecAccess : CsrAccess, mtvecInit : BigInt, @@ -68,6 +68,8 @@ case class CsrPluginConfig( satpAccess : CsrAccess = CsrAccess.NONE, medelegAccess : CsrAccess = CsrAccess.NONE, midelegAccess : CsrAccess = CsrAccess.NONE, + withExternalMhartid : Boolean = false, + mhartidWidth : Int = 0, pipelineCsrRead : Boolean = false, pipelinedInterrupt : Boolean = true, csrOhDecoder : Boolean = true, @@ -85,12 +87,12 @@ object CsrPluginConfig{ def small : CsrPluginConfig = small(0x00000020l) def smallest : CsrPluginConfig = smallest(0x00000020l) - def openSbi(hartId : Int, misa : Int) = CsrPluginConfig( + def openSbi(misa : Int) = CsrPluginConfig( catchIllegalAccess = true, mvendorid = 0, marchid = 0, mimpid = 0, - mhartid = hartId, + mhartid = 0, misaExtensionsInit = misa, misaAccess = CsrAccess.READ_ONLY, mtvecAccess = CsrAccess.READ_WRITE, //Could have been WRITE_ONLY :( @@ -387,6 +389,7 @@ class CsrPlugin(val config: CsrPluginConfig) extends Plugin[VexRiscv] with Excep var contextSwitching : Bool = null var thirdPartyWake : Bool = null var inWfi : Bool = null + var externalMhartId : UInt = null override def askWake(): Unit = thirdPartyWake := True @@ -515,6 +518,8 @@ class CsrPlugin(val config: CsrPluginConfig) extends Plugin[VexRiscv] with Excep pipeline.update(MPP, UInt(2 bits)) + + if(withExternalMhartid) externalMhartId = in UInt(mhartidWidth bits) } def inhibateInterrupts() : Unit = allowInterrupts := False @@ -600,7 +605,8 @@ class CsrPlugin(val config: CsrPluginConfig) extends Plugin[VexRiscv] with Excep if(mvendorid != null) READ_ONLY(CSR.MVENDORID, U(mvendorid)) if(marchid != null) READ_ONLY(CSR.MARCHID , U(marchid )) if(mimpid != null) READ_ONLY(CSR.MIMPID , U(mimpid )) - if(mhartid != null) READ_ONLY(CSR.MHARTID , U(mhartid )) + if(mhartid != null && !withExternalMhartid) READ_ONLY(CSR.MHARTID , U(mhartid )) + if(withExternalMhartid) READ_ONLY(CSR.MHARTID , externalMhartId) misaAccess(CSR.MISA, xlen-2 -> misa.base , 0 -> misa.extensions) //Machine CSR diff --git a/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala b/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala index f133616..0b580d8 100644 --- a/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala +++ b/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala @@ -195,6 +195,7 @@ class DBusCachedPlugin(val config : DataCacheConfig, rsp.exclusive := RegNext(dBus.rsp.exclusive) rsp.error := RegNext(dBus.rsp.error) rsp.last := RegNext(dBus.rsp.last) + rsp.aggregated := RegNext(dBus.rsp.aggregated) rsp.data := RegNextWhen(dBus.rsp.data, dBus.rsp.valid && !cache.io.cpu.writeBack.keepMemRspData) rsp } From 18cce053a33649c783371662c4150cef5e9f501d Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Wed, 27 May 2020 14:19:17 +0200 Subject: [PATCH 71/91] Improve SingleInstructionLimiterPlugin to also include fetch stages --- .../vexriscv/plugin/SingleInstructionLimiterPlugin.scala | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/main/scala/vexriscv/plugin/SingleInstructionLimiterPlugin.scala b/src/main/scala/vexriscv/plugin/SingleInstructionLimiterPlugin.scala index deae767..c6c9706 100644 --- a/src/main/scala/vexriscv/plugin/SingleInstructionLimiterPlugin.scala +++ b/src/main/scala/vexriscv/plugin/SingleInstructionLimiterPlugin.scala @@ -9,7 +9,9 @@ class SingleInstructionLimiterPlugin() extends Plugin[VexRiscv] { override def build(pipeline: VexRiscv): Unit = { import pipeline._ import pipeline.config._ - - decode.arbitration.haltByOther.setWhen(List(decode,execute,memory,writeBack).map(_.arbitration.isValid).orR) + val fetcher = pipeline.service(classOf[IBusFetcher]) + when(fetcher.incoming() || List(decode,execute,memory,writeBack).map(_.arbitration.isValid).orR) { + fetcher.haltIt() + } } } From bc4a2c37473dc6ffe7d78c91c9d8b6aabb3c2a4a Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Wed, 27 May 2020 14:19:37 +0200 Subject: [PATCH 72/91] Fix SmpCluster jtag --- .../vexriscv/demo/smp/VexRiscvSmpCluster.scala | 17 ++++++++--------- .../demo/smp/VexRiscvSmpLitexCluster.scala | 2 -- src/main/scala/vexriscv/plugin/CsrPlugin.scala | 4 ++-- 3 files changed, 10 insertions(+), 13 deletions(-) diff --git a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala index 38506f2..422b1e2 100644 --- a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala +++ b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala @@ -51,12 +51,12 @@ case class VexRiscvSmpCluster(p : VexRiscvSmpClusterParameter, } case _ => } + if(cpuId == 0) cpuConfig.plugins += new DebugPlugin(debugClockDomain) val core = new VexRiscv(cpuConfig) core.plugins.foreach { case plugin: IBusCachedPlugin => iBus = plugin.iBus.toBmb() case plugin: DBusCachedPlugin => dBus = plugin.dBus.toBmb().pipelined(cmdValid = true) case plugin: CsrPlugin => { - plugin.externalMhartId := cpuId plugin.softwareInterrupt := io.softwareInterrupts(cpuId) plugin.externalInterrupt := io.externalInterrupts(cpuId) plugin.timerInterrupt := io.timerInterrupts(cpuId) @@ -113,12 +113,12 @@ case class VexRiscvSmpCluster(p : VexRiscvSmpClusterParameter, object VexRiscvSmpClusterGen { - def vexRiscvConfig(hartIdWidth : Int, - hartId : Int, + def vexRiscvConfig(hartId : Int, ioRange : UInt => Bool = (x => x(31 downto 28) === 0xF), - resetVector : Long = 0x80000000l) = { - val iBusWidth = 128 - val dBusWidth = 64 + resetVector : Long = 0x80000000l, + iBusWidth : Int = 128, + dBusWidth : Int = 64) = { + val config = VexRiscvConfig( plugins = List( new MmuPlugin( @@ -213,7 +213,7 @@ object VexRiscvSmpClusterGen { mulUnrollFactor = 32, divUnrollFactor = 1 ), - new CsrPlugin(CsrPluginConfig.openSbi(misa = Riscv.misaToInt("imas")).copy(withExternalMhartid = true, mhartidWidth = hartIdWidth)), + new CsrPlugin(CsrPluginConfig.openSbi(mhartid = hartId, misa = Riscv.misaToInt("imas"))), new BranchPlugin( earlyBranch = false, catchAddressMisaligned = true, @@ -222,14 +222,13 @@ object VexRiscvSmpClusterGen { new YamlPlugin(s"cpu$hartId.yaml") ) ) - if(hartId == 0) config.plugins += new DebugPlugin(null) config } def vexRiscvCluster(cpuCount : Int, resetVector : Long = 0x80000000l) = VexRiscvSmpCluster( debugClockDomain = ClockDomain.current.copy(reset = Bool().setName("debugResetIn")), p = VexRiscvSmpClusterParameter( cpuConfigs = List.tabulate(cpuCount) { - vexRiscvConfig(log2Up(cpuCount), _, resetVector = resetVector) + vexRiscvConfig(_, resetVector = resetVector) } ) ) diff --git a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexCluster.scala b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexCluster.scala index 3f3047f..30753c2 100644 --- a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexCluster.scala +++ b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexCluster.scala @@ -342,7 +342,6 @@ object VexRiscvLitexSmpClusterGen extends App { cluster = VexRiscvSmpClusterParameter( cpuConfigs = List.tabulate(cpuCount) { hartId => vexRiscvConfig( - hartIdWidth = log2Up(cpuCount), hartId = hartId, ioRange = address => address.msb, resetVector = 0 @@ -382,7 +381,6 @@ object VexRiscvLitexSmpClusterOpenSbi extends App{ cluster = VexRiscvSmpClusterParameter( cpuConfigs = List.tabulate(cpuCount) { hartId => vexRiscvConfig( - hartIdWidth = log2Up(cpuCount), hartId = hartId, ioRange = address => address(31 downto 28) === 0xF, resetVector = 0x80000000l diff --git a/src/main/scala/vexriscv/plugin/CsrPlugin.scala b/src/main/scala/vexriscv/plugin/CsrPlugin.scala index 23f3323..ebff1e0 100644 --- a/src/main/scala/vexriscv/plugin/CsrPlugin.scala +++ b/src/main/scala/vexriscv/plugin/CsrPlugin.scala @@ -87,12 +87,12 @@ object CsrPluginConfig{ def small : CsrPluginConfig = small(0x00000020l) def smallest : CsrPluginConfig = smallest(0x00000020l) - def openSbi(misa : Int) = CsrPluginConfig( + def openSbi(mhartid : Int, misa : Int) = CsrPluginConfig( catchIllegalAccess = true, mvendorid = 0, marchid = 0, mimpid = 0, - mhartid = 0, + mhartid = mhartid, misaExtensionsInit = misa, misaAccess = CsrAccess.READ_ONLY, mtvecAccess = CsrAccess.READ_WRITE, //Could have been WRITE_ONLY :( From 5e5c730959c79896e1c628ccff213d0a7999b30e Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Fri, 29 May 2020 10:56:55 +0200 Subject: [PATCH 73/91] Add LitexSmpDevCluster with per cpu dedicated litedram ports --- src/main/scala/vexriscv/demo/smp/Misc.scala | 242 ++++++++++++++++ .../demo/smp/VexRiscvSmpLitexCluster.scala | 221 --------------- .../demo/smp/VexRiscvSmpLitexDevCluster.scala | 262 ++++++++++++++++++ 3 files changed, 504 insertions(+), 221 deletions(-) create mode 100644 src/main/scala/vexriscv/demo/smp/Misc.scala create mode 100644 src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexDevCluster.scala diff --git a/src/main/scala/vexriscv/demo/smp/Misc.scala b/src/main/scala/vexriscv/demo/smp/Misc.scala new file mode 100644 index 0000000..a7965a4 --- /dev/null +++ b/src/main/scala/vexriscv/demo/smp/Misc.scala @@ -0,0 +1,242 @@ +package vexriscv.demo.smp + + +import spinal.core._ +import spinal.lib.bus.bmb._ +import spinal.lib.bus.wishbone.{Wishbone, WishboneConfig, WishboneSlaveFactory} +import spinal.lib.com.jtag.Jtag +import spinal.lib._ +import spinal.lib.bus.bmb.sim.{BmbMemoryMultiPort, BmbMemoryTester} +import spinal.lib.bus.misc.{AddressMapping, DefaultMapping, SizeMapping} +import spinal.lib.eda.bench.Bench +import spinal.lib.misc.Clint +import spinal.lib.sim.{SimData, SparseMemory, StreamDriver, StreamMonitor, StreamReadyRandomizer} +import vexriscv.demo.smp.VexRiscvLitexSmpClusterOpenSbi.{cpuCount, parameter} +import vexriscv.demo.smp.VexRiscvSmpClusterGen.vexRiscvConfig +import vexriscv.{VexRiscv, VexRiscvConfig} +import vexriscv.plugin.{CsrPlugin, DBusCachedPlugin, DebugPlugin, IBusCachedPlugin} + +import scala.collection.mutable +import scala.util.Random + +case class LiteDramNativeParameter(addressWidth : Int, dataWidth : Int) + +case class LiteDramNativeCmd(p : LiteDramNativeParameter) extends Bundle{ + val we = Bool() + val addr = UInt(p.addressWidth bits) +} + +case class LiteDramNativeWData(p : LiteDramNativeParameter) extends Bundle{ + val data = Bits(p.dataWidth bits) + val we = Bits(p.dataWidth/8 bits) +} + +case class LiteDramNativeRData(p : LiteDramNativeParameter) extends Bundle{ + val data = Bits(p.dataWidth bits) +} + + +case class LiteDramNative(p : LiteDramNativeParameter) extends Bundle with IMasterSlave { + val cmd = Stream(LiteDramNativeCmd(p)) + val wdata = Stream(LiteDramNativeWData(p)) + val rdata = Stream(LiteDramNativeRData(p)) + override def asMaster(): Unit = { + master(cmd, wdata) + slave(rdata) + } + + def fromBmb(bmb : Bmb, wdataFifoSize : Int, rdataFifoSize : Int) = { + val bridge = BmbToLiteDram( + bmbParameter = bmb.p, + liteDramParameter = this.p, + wdataFifoSize = wdataFifoSize, + rdataFifoSize = rdataFifoSize + ) + bridge.io.input << bmb + bridge.io.output <> this + bridge + } + + def simSlave(ram : SparseMemory,cd : ClockDomain, bmb : Bmb = null): Unit ={ + import spinal.core.sim._ + def bus = this + case class Cmd(address : Long, we : Boolean) + case class WData(data : BigInt, we : Long) + val cmdQueue = mutable.Queue[Cmd]() + val wdataQueue = mutable.Queue[WData]() + val rdataQueue = mutable.Queue[BigInt]() + + + case class Ref(address : Long, data : BigInt, we : Long, time : Long) + val ref = mutable.Queue[Ref]() + if(bmb != null) StreamMonitor(bmb.cmd, cd){p => + if(bmb.cmd.opcode.toInt == 1) ref.enqueue(Ref(p.fragment.address.toLong, p.fragment.data.toBigInt, p.fragment.mask.toLong, simTime())) + } + + var writeCmdCounter, writeDataCounter = 0 + StreamReadyRandomizer(bus.cmd, cd).factor = 0.5f + StreamMonitor(bus.cmd, cd) { t => + cmdQueue.enqueue(Cmd(t.addr.toLong * (p.dataWidth/8) , t.we.toBoolean)) + if(t.we.toBoolean) writeCmdCounter += 1 + } + + StreamReadyRandomizer(bus.wdata, cd).factor = 0.5f + StreamMonitor(bus.wdata, cd) { p => + writeDataCounter += 1 + // if(p.data.toBigInt == BigInt("00000002000000020000000200000002",16)){ + // println("ASD") + // } + wdataQueue.enqueue(WData(p.data.toBigInt, p.we.toLong)) + } + + // new SimStreamAssert(cmd,cd) + // new SimStreamAssert(wdata,cd) + // new SimStreamAssert(rdata,cd) + + cd.onSamplings{ + if(writeDataCounter-writeCmdCounter > 2){ + println("miaou") + } + if(cmdQueue.nonEmpty && Random.nextFloat() < 0.5){ + val cmd = cmdQueue.head + if(cmd.we){ + if(wdataQueue.nonEmpty){ + // if(cmd.address == 0xc02ae850l) { + // println(s"! $writeCmdCounter $writeDataCounter") + // } + cmdQueue.dequeue() + val wdata = wdataQueue.dequeue() + val raw = wdata.data.toByteArray + val left = wdata.data.toByteArray.size-1 + if(bmb != null){ + assert(ref.nonEmpty) + assert((ref.head.address & 0xFFFFFFF0l) == cmd.address) + assert(ref.head.data == wdata.data) + assert(ref.head.we == wdata.we) + ref.dequeue() + } + // if(cmd.address == 0xc02ae850l) { + // println(s"$cmd $wdata ${simTime()}") + // } + for(i <- 0 until p.dataWidth/8){ + + + if(((wdata.we >> i) & 1) != 0) { + // if(cmd.address == 0xc02ae850l) { + // println(s"W $i ${ if (left - i >= 0) raw(left - i) else 0}") + // } + ram.write(cmd.address + i, if (left - i >= 0) raw(left - i) else 0) + } + } + } + } else { + cmdQueue.dequeue() + val value = new Array[Byte](p.dataWidth/8+1) + val left = value.size-1 + for(i <- 0 until p.dataWidth/8) { + value(left-i) = ram.read(cmd.address+i) + } + rdataQueue.enqueue(BigInt(value)) + } + } + } + + StreamDriver(bus.rdata, cd){ p => + if(rdataQueue.isEmpty){ + false + } else { + p.data #= rdataQueue.dequeue() + true + } + } + } +} + + + +case class BmbToLiteDram(bmbParameter : BmbParameter, + liteDramParameter : LiteDramNativeParameter, + wdataFifoSize : Int, + rdataFifoSize : Int) extends Component{ + val io = new Bundle { + val input = slave(Bmb(bmbParameter)) + val output = master(LiteDramNative(liteDramParameter)) + } + + val resized = io.input.resize(liteDramParameter.dataWidth) + val unburstified = resized.unburstify() + case class Context() extends Bundle { + val context = Bits(unburstified.p.contextWidth bits) + val source = UInt(unburstified.p.sourceWidth bits) + val isWrite = Bool() + } + + assert(isPow2(rdataFifoSize)) + val pendingRead = Reg(UInt(log2Up(rdataFifoSize) + 1 bits)) init(0) + + val halt = Bool() + val (cmdFork, dataFork) = StreamFork2(unburstified.cmd.haltWhen(halt)) + val outputCmd = Stream(LiteDramNativeCmd(liteDramParameter)) + outputCmd.arbitrationFrom(cmdFork.haltWhen(pendingRead.msb)) + outputCmd.addr := (cmdFork.address >> log2Up(liteDramParameter.dataWidth/8)).resized + outputCmd.we := cmdFork.isWrite + + io.output.cmd <-< outputCmd + + if(bmbParameter.canWrite) { + val wData = Stream(LiteDramNativeWData(liteDramParameter)) + wData.arbitrationFrom(dataFork.throwWhen(dataFork.isRead)) + wData.data := dataFork.data + wData.we := dataFork.mask + io.output.wdata << wData.queueLowLatency(wdataFifoSize, latency = 1) //TODO queue low latency + } else { + dataFork.ready := True + io.output.wdata.valid := False + io.output.wdata.data.assignDontCare() + io.output.wdata.we.assignDontCare() + } + + val cmdContext = Stream(Context()) + cmdContext.valid := unburstified.cmd.fire + cmdContext.context := unburstified.cmd.context + cmdContext.source := unburstified.cmd.source + cmdContext.isWrite := unburstified.cmd.isWrite + halt := !cmdContext.ready + + val rspContext = cmdContext.queue(rdataFifoSize) + val rdataFifo = io.output.rdata.queueLowLatency(rdataFifoSize, latency = 1) + + rdataFifo.ready := unburstified.rsp.fire && !rspContext.isWrite + rspContext.ready := unburstified.rsp.fire + unburstified.rsp.valid := rspContext.valid && (rspContext.isWrite || rdataFifo.valid) + unburstified.rsp.setSuccess() + unburstified.rsp.last := True + unburstified.rsp.source := rspContext.source + unburstified.rsp.context := rspContext.context + unburstified.rsp.data := rdataFifo.data + + + pendingRead := pendingRead + U(outputCmd.fire && !outputCmd.we) - U(rdataFifo.fire) +} + +object BmbToLiteDramTester extends App{ + import spinal.core.sim._ + SimConfig.withWave.compile(BmbToLiteDram( + bmbParameter = BmbParameter( + addressWidth = 20, + dataWidth = 32, + lengthWidth = 6, + sourceWidth = 4, + contextWidth = 16 + ), + liteDramParameter = LiteDramNativeParameter( + addressWidth = 20, + dataWidth = 128 + ), + wdataFifoSize = 16, + rdataFifoSize = 16 + )).doSimUntilVoid(seed = 42){dut => + val tester = new BmbMemoryTester(dut.io.input, dut.clockDomain, rspCounterTarget = 3000) + dut.io.output.simSlave(tester.memory.memory, dut.clockDomain) + } +} \ No newline at end of file diff --git a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexCluster.scala b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexCluster.scala index 30753c2..f73c859 100644 --- a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexCluster.scala +++ b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexCluster.scala @@ -18,227 +18,6 @@ import vexriscv.plugin.{CsrPlugin, DBusCachedPlugin, DebugPlugin, IBusCachedPlug import scala.collection.mutable import scala.util.Random -case class LiteDramNativeParameter(addressWidth : Int, dataWidth : Int) - -case class LiteDramNativeCmd(p : LiteDramNativeParameter) extends Bundle{ - val we = Bool() - val addr = UInt(p.addressWidth bits) -} - -case class LiteDramNativeWData(p : LiteDramNativeParameter) extends Bundle{ - val data = Bits(p.dataWidth bits) - val we = Bits(p.dataWidth/8 bits) -} - -case class LiteDramNativeRData(p : LiteDramNativeParameter) extends Bundle{ - val data = Bits(p.dataWidth bits) -} - - -case class LiteDramNative(p : LiteDramNativeParameter) extends Bundle with IMasterSlave { - val cmd = Stream(LiteDramNativeCmd(p)) - val wdata = Stream(LiteDramNativeWData(p)) - val rdata = Stream(LiteDramNativeRData(p)) - override def asMaster(): Unit = { - master(cmd, wdata) - slave(rdata) - } - - def fromBmb(bmb : Bmb, wdataFifoSize : Int, rdataFifoSize : Int) = { - val bridge = BmbToLiteDram( - bmbParameter = bmb.p, - liteDramParameter = this.p, - wdataFifoSize = wdataFifoSize, - rdataFifoSize = rdataFifoSize - ) - bridge.io.input << bmb - bridge.io.output <> this - bridge - } - - def simSlave(ram : SparseMemory,cd : ClockDomain, bmb : Bmb = null): Unit ={ - import spinal.core.sim._ - def bus = this - case class Cmd(address : Long, we : Boolean) - case class WData(data : BigInt, we : Long) - val cmdQueue = mutable.Queue[Cmd]() - val wdataQueue = mutable.Queue[WData]() - val rdataQueue = mutable.Queue[BigInt]() - - - case class Ref(address : Long, data : BigInt, we : Long, time : Long) - val ref = mutable.Queue[Ref]() - if(bmb != null) StreamMonitor(bmb.cmd, cd){p => - if(bmb.cmd.opcode.toInt == 1) ref.enqueue(Ref(p.fragment.address.toLong, p.fragment.data.toBigInt, p.fragment.mask.toLong, simTime())) - } - - var writeCmdCounter, writeDataCounter = 0 - StreamReadyRandomizer(bus.cmd, cd).factor = 0.5f - StreamMonitor(bus.cmd, cd) { t => - cmdQueue.enqueue(Cmd(t.addr.toLong * (p.dataWidth/8) , t.we.toBoolean)) - if(t.we.toBoolean) writeCmdCounter += 1 - } - - StreamReadyRandomizer(bus.wdata, cd).factor = 0.5f - StreamMonitor(bus.wdata, cd) { p => - writeDataCounter += 1 -// if(p.data.toBigInt == BigInt("00000002000000020000000200000002",16)){ -// println("ASD") -// } - wdataQueue.enqueue(WData(p.data.toBigInt, p.we.toLong)) - } - -// new SimStreamAssert(cmd,cd) -// new SimStreamAssert(wdata,cd) -// new SimStreamAssert(rdata,cd) - - cd.onSamplings{ - if(writeDataCounter-writeCmdCounter > 2){ - println("miaou") - } - if(cmdQueue.nonEmpty && Random.nextFloat() < 0.5){ - val cmd = cmdQueue.head - if(cmd.we){ - if(wdataQueue.nonEmpty){ -// if(cmd.address == 0xc02ae850l) { -// println(s"! $writeCmdCounter $writeDataCounter") -// } - cmdQueue.dequeue() - val wdata = wdataQueue.dequeue() - val raw = wdata.data.toByteArray - val left = wdata.data.toByteArray.size-1 - if(bmb != null){ - assert(ref.nonEmpty) - assert((ref.head.address & 0xFFFFFFF0l) == cmd.address) - assert(ref.head.data == wdata.data) - assert(ref.head.we == wdata.we) - ref.dequeue() - } -// if(cmd.address == 0xc02ae850l) { -// println(s"$cmd $wdata ${simTime()}") -// } - for(i <- 0 until p.dataWidth/8){ - - - if(((wdata.we >> i) & 1) != 0) { -// if(cmd.address == 0xc02ae850l) { -// println(s"W $i ${ if (left - i >= 0) raw(left - i) else 0}") -// } - ram.write(cmd.address + i, if (left - i >= 0) raw(left - i) else 0) - } - } - } - } else { - cmdQueue.dequeue() - val value = new Array[Byte](p.dataWidth/8+1) - val left = value.size-1 - for(i <- 0 until p.dataWidth/8) { - value(left-i) = ram.read(cmd.address+i) - } - rdataQueue.enqueue(BigInt(value)) - } - } - } - - StreamDriver(bus.rdata, cd){ p => - if(rdataQueue.isEmpty){ - false - } else { - p.data #= rdataQueue.dequeue() - true - } - } - } -} - - - -case class BmbToLiteDram(bmbParameter : BmbParameter, - liteDramParameter : LiteDramNativeParameter, - wdataFifoSize : Int, - rdataFifoSize : Int) extends Component{ - val io = new Bundle { - val input = slave(Bmb(bmbParameter)) - val output = master(LiteDramNative(liteDramParameter)) - } - - val resized = io.input.resize(liteDramParameter.dataWidth) - val unburstified = resized.unburstify() - case class Context() extends Bundle { - val context = Bits(unburstified.p.contextWidth bits) - val source = UInt(unburstified.p.sourceWidth bits) - val isWrite = Bool() - } - - assert(isPow2(rdataFifoSize)) - val pendingRead = Reg(UInt(log2Up(rdataFifoSize) + 1 bits)) init(0) - - val halt = Bool() - val (cmdFork, dataFork) = StreamFork2(unburstified.cmd.haltWhen(halt)) - val outputCmd = Stream(LiteDramNativeCmd(liteDramParameter)) - outputCmd.arbitrationFrom(cmdFork.haltWhen(pendingRead.msb)) - outputCmd.addr := (cmdFork.address >> log2Up(liteDramParameter.dataWidth/8)).resized - outputCmd.we := cmdFork.isWrite - - io.output.cmd <-< outputCmd - - if(bmbParameter.canWrite) { - val wData = Stream(LiteDramNativeWData(liteDramParameter)) - wData.arbitrationFrom(dataFork.throwWhen(dataFork.isRead)) - wData.data := dataFork.data - wData.we := dataFork.mask - io.output.wdata << wData.queueLowLatency(wdataFifoSize, latency = 1) //TODO queue low latency - } else { - dataFork.ready := True - io.output.wdata.valid := False - io.output.wdata.data.assignDontCare() - io.output.wdata.we.assignDontCare() - } - - val cmdContext = Stream(Context()) - cmdContext.valid := unburstified.cmd.fire - cmdContext.context := unburstified.cmd.context - cmdContext.source := unburstified.cmd.source - cmdContext.isWrite := unburstified.cmd.isWrite - halt := !cmdContext.ready - - val rspContext = cmdContext.queue(rdataFifoSize) - val rdataFifo = io.output.rdata.queueLowLatency(rdataFifoSize, latency = 1) - - rdataFifo.ready := unburstified.rsp.fire && !rspContext.isWrite - rspContext.ready := unburstified.rsp.fire - unburstified.rsp.valid := rspContext.valid && (rspContext.isWrite || rdataFifo.valid) - unburstified.rsp.setSuccess() - unburstified.rsp.last := True - unburstified.rsp.source := rspContext.source - unburstified.rsp.context := rspContext.context - unburstified.rsp.data := rdataFifo.data - - - pendingRead := pendingRead + U(outputCmd.fire && !outputCmd.we) - U(rdataFifo.fire) -} - -object BmbToLiteDramTester extends App{ - import spinal.core.sim._ - SimConfig.withWave.compile(BmbToLiteDram( - bmbParameter = BmbParameter( - addressWidth = 20, - dataWidth = 32, - lengthWidth = 6, - sourceWidth = 4, - contextWidth = 16 - ), - liteDramParameter = LiteDramNativeParameter( - addressWidth = 20, - dataWidth = 128 - ), - wdataFifoSize = 16, - rdataFifoSize = 16 - )).doSimUntilVoid(seed = 42){dut => - val tester = new BmbMemoryTester(dut.io.input, dut.clockDomain, rspCounterTarget = 3000) - dut.io.output.simSlave(tester.memory.memory, dut.clockDomain) - } -} case class VexRiscvLitexSmpClusterParameter( cluster : VexRiscvSmpClusterParameter, liteDram : LiteDramNativeParameter, diff --git a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexDevCluster.scala b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexDevCluster.scala new file mode 100644 index 0000000..fbc184e --- /dev/null +++ b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexDevCluster.scala @@ -0,0 +1,262 @@ +package vexriscv.demo.smp + +import spinal.core._ +import spinal.lib.bus.bmb._ +import spinal.lib.bus.wishbone.{Wishbone, WishboneConfig, WishboneSlaveFactory} +import spinal.lib.com.jtag.Jtag +import spinal.lib._ +import spinal.lib.bus.bmb.sim.{BmbMemoryMultiPort, BmbMemoryTester} +import spinal.lib.bus.misc.{AddressMapping, DefaultMapping, SizeMapping} +import spinal.lib.eda.bench.Bench +import spinal.lib.misc.Clint +import spinal.lib.sim.{SimData, SparseMemory, StreamDriver, StreamMonitor, StreamReadyRandomizer} +import vexriscv.demo.smp.VexRiscvLitexSmpDevClusterOpenSbi.{cpuCount, parameter} +import vexriscv.demo.smp.VexRiscvSmpClusterGen.vexRiscvConfig +import vexriscv.{VexRiscv, VexRiscvConfig} +import vexriscv.plugin.{CsrPlugin, DBusCachedPlugin, DebugPlugin, IBusCachedPlugin} + +import scala.collection.mutable +import scala.util.Random + + +case class VexRiscvLitexSmpDevClusterParameter( cluster : VexRiscvSmpClusterParameter, + liteDram : LiteDramNativeParameter, + liteDramMapping : AddressMapping) + +//addAttribute("""mark_debug = "true"""") +case class VexRiscvLitexSmpDevCluster(p : VexRiscvLitexSmpDevClusterParameter, + debugClockDomain : ClockDomain) extends Component{ + + val peripheralWishboneConfig = WishboneConfig( + addressWidth = 30, + dataWidth = 32, + selWidth = 4, + useERR = true, + useBTE = true, + useCTI = true + ) + + val cpuCount = p.cluster.cpuConfigs.size + + val io = new Bundle { + val dMem = Vec(master(LiteDramNative(p.liteDram)), cpuCount) + val iMem = Vec(master(LiteDramNative(p.liteDram)), cpuCount) + val peripheral = master(Wishbone(peripheralWishboneConfig)) + val clint = slave(Wishbone(Clint.getWisboneConfig())) + val externalInterrupts = in Bits(p.cluster.cpuConfigs.size bits) + val externalSupervisorInterrupts = in Bits(p.cluster.cpuConfigs.size bits) + val jtag = slave(Jtag()) + val debugReset = out Bool() + } + val clint = Clint(cpuCount) + clint.driveFrom(WishboneSlaveFactory(io.clint)) + + val cluster = VexRiscvSmpCluster(p.cluster, debugClockDomain) + cluster.io.externalInterrupts <> io.externalInterrupts + cluster.io.externalSupervisorInterrupts <> io.externalSupervisorInterrupts + cluster.io.jtag <> io.jtag + cluster.io.debugReset <> io.debugReset + cluster.io.timerInterrupts <> B(clint.harts.map(_.timerInterrupt)) + cluster.io.softwareInterrupts <> B(clint.harts.map(_.softwareInterrupt)) + + val dBusDecoder = BmbDecoderOutOfOrder( + p = cluster.io.dMem.p, + mappings = Seq(DefaultMapping, p.liteDramMapping), + capabilities = Seq(cluster.io.dMem.p, cluster.io.dMem.p), + pendingRspTransactionMax = 32 + ) +// val dBusDecoder = BmbDecoderOut( +// p = cluster.io.dMem.p, +// mappings = Seq(DefaultMapping, p.liteDramMapping), +// capabilities = Seq(cluster.io.dMem.p, cluster.io.dMem.p), +// pendingMax = 31 +// ) + dBusDecoder.io.input << cluster.io.dMem.pipelined(cmdValid = true, cmdReady = true, rspValid = true) + + + val perIBus = for(id <- 0 until cpuCount) yield new Area{ + val decoder = BmbDecoder( + p = cluster.io.iMems(id).p, + mappings = Seq(DefaultMapping, p.liteDramMapping), + capabilities = Seq(cluster.io.iMems(id).p,cluster.io.iMems(id).p), + pendingMax = 15 + ) + + decoder.io.input << cluster.io.iMems(id) + io.iMem(id).fromBmb(decoder.io.outputs(1), wdataFifoSize = 0, rdataFifoSize = 32) + val toPeripheral = decoder.io.outputs(0).resize(dataWidth = 32) + } + + val dBusDecoderToPeripheral = dBusDecoder.io.outputs(0).resize(dataWidth = 32).pipelined(cmdHalfRate = true, rspValid = true) + + val peripheralAccessLength = Math.max(perIBus(0).toPeripheral.p.lengthWidth, dBusDecoder.io.outputs(0).p.lengthWidth) + val peripheralArbiter = BmbArbiter( + p = dBusDecoder.io.outputs(0).p.copy( + sourceWidth = List(perIBus(0).toPeripheral, dBusDecoderToPeripheral).map(_.p.sourceWidth).max + log2Up(cpuCount + 1), + contextWidth = List(perIBus(0).toPeripheral, dBusDecoderToPeripheral).map(_.p.contextWidth).max, + lengthWidth = peripheralAccessLength, + dataWidth = 32 + ), + portCount = cpuCount+1, + lowerFirstPriority = true + ) + + for(id <- 0 until cpuCount){ + peripheralArbiter.io.inputs(id) << perIBus(id).toPeripheral + } + peripheralArbiter.io.inputs(cpuCount) << dBusDecoderToPeripheral + + val peripheralWishbone = peripheralArbiter.io.output.pipelined(cmdValid = true).toWishbone() + io.peripheral << peripheralWishbone + + + val dBusDemux = BmbSourceDecoder(dBusDecoder.io.outputs(1).p) + dBusDemux.io.input << dBusDecoder.io.outputs(1) + val dMemBridge = for(id <- 0 until cpuCount) yield { + io.dMem(id).fromBmb(dBusDemux.io.outputs(id), wdataFifoSize = 32, rdataFifoSize = 32) + } + +} + +object VexRiscvLitexSmpDevClusterGen extends App { + for(cpuCount <- List(1,2,4,8)) { + def parameter = VexRiscvLitexSmpDevClusterParameter( + cluster = VexRiscvSmpClusterParameter( + cpuConfigs = List.tabulate(cpuCount) { hartId => + vexRiscvConfig( + hartId = hartId, + ioRange = address => address.msb, + resetVector = 0 + ) + } + ), + liteDram = LiteDramNativeParameter(addressWidth = 32, dataWidth = 128), + liteDramMapping = SizeMapping(0x40000000l, 0x40000000l) + ) + + def dutGen = { + val toplevel = VexRiscvLitexSmpDevCluster( + p = parameter, + debugClockDomain = ClockDomain.current.copy(reset = Bool().setName("debugResetIn")) + ) + toplevel + } + + val genConfig = SpinalConfig().addStandardMemBlackboxing(blackboxByteEnables) + // genConfig.generateVerilog(Bench.compressIo(dutGen)) + genConfig.generateVerilog(dutGen.setDefinitionName(s"VexRiscvLitexSmpDevCluster_${cpuCount}c")) + } + +} + + +object VexRiscvLitexSmpDevClusterOpenSbi extends App{ + import spinal.core.sim._ + + val simConfig = SimConfig + simConfig.withWave + simConfig.allOptimisation + + val cpuCount = 4 + + def parameter = VexRiscvLitexSmpDevClusterParameter( + cluster = VexRiscvSmpClusterParameter( + cpuConfigs = List.tabulate(cpuCount) { hartId => + vexRiscvConfig( + hartId = hartId, + ioRange = address => address(31 downto 28) === 0xF, + resetVector = 0x80000000l + ) + } + ), + liteDram = LiteDramNativeParameter(addressWidth = 32, dataWidth = 128), + liteDramMapping = SizeMapping(0x80000000l, 0x70000000l) + ) + + def dutGen = { + val top = VexRiscvLitexSmpDevCluster( + p = parameter, + debugClockDomain = ClockDomain.current.copy(reset = Bool().setName("debugResetIn")) + ) + top.rework{ + top.io.clint.setAsDirectionLess.allowDirectionLessIo + top.io.peripheral.setAsDirectionLess.allowDirectionLessIo.simPublic() + + val hit = (top.io.peripheral.ADR <<2 >= 0xF0010000l && top.io.peripheral.ADR<<2 < 0xF0020000l) + top.io.clint.CYC := top.io.peripheral.CYC && hit + top.io.clint.STB := top.io.peripheral.STB + top.io.clint.WE := top.io.peripheral.WE + top.io.clint.ADR := top.io.peripheral.ADR.resized + top.io.clint.DAT_MOSI := top.io.peripheral.DAT_MOSI + top.io.peripheral.DAT_MISO := top.io.clint.DAT_MISO + top.io.peripheral.ACK := top.io.peripheral.CYC && (!hit || top.io.clint.ACK) + top.io.peripheral.ERR := False + +// top.dMemBridge.unburstified.cmd.simPublic() + } + top + } + simConfig.compile(dutGen).doSimUntilVoid(seed = 42){dut => + dut.clockDomain.forkStimulus(10) + fork { + dut.debugClockDomain.resetSim #= false + sleep (0) + dut.debugClockDomain.resetSim #= true + sleep (10) + dut.debugClockDomain.resetSim #= false + } + + + val ram = SparseMemory() + ram.loadBin(0x80000000l, "../opensbi/build/platform/spinal/vexriscv/sim/smp/firmware/fw_jump.bin") + ram.loadBin(0xC0000000l, "../buildroot/output/images/Image") + ram.loadBin(0xC1000000l, "../buildroot/output/images/dtb") + ram.loadBin(0xC2000000l, "../buildroot/output/images/rootfs.cpio") + + for(id <- 0 until cpuCount) { + dut.io.iMem(id).simSlave(ram, dut.clockDomain) + dut.io.dMem(id).simSlave(ram, dut.clockDomain) + } + + dut.io.externalInterrupts #= 0 + dut.io.externalSupervisorInterrupts #= 0 + + dut.clockDomain.onSamplings{ + if(dut.io.peripheral.CYC.toBoolean){ + (dut.io.peripheral.ADR.toLong << 2) match { + case 0xF0000000l => print(dut.io.peripheral.DAT_MOSI.toLong.toChar) + case 0xF0000004l => dut.io.peripheral.DAT_MISO #= (if(System.in.available() != 0) System.in.read() else 0xFFFFFFFFl) + case _ => + } +// println(f"${dut.io.peripheral.ADR.toLong}%x") + } + } + +// fork{ +// disableSimWave() +// val atMs = 3790 +// val durationMs = 5 +// sleep(atMs*1000000l) +// enableSimWave() +// println("** enableSimWave **") +// sleep(durationMs*1000000l) +// println("** disableSimWave **") +// while(true) { +// disableSimWave() +// sleep(100000 * 10) +// enableSimWave() +// sleep( 100 * 10) +// } +// // simSuccess() +// } + + fork{ + while(true) { + disableSimWave() + sleep(100000 * 10) + enableSimWave() + sleep( 100 * 10) + } + } + } + } \ No newline at end of file From 08189ee9073d84e6d7c0df2d3e4cbbaa3d632191 Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Tue, 2 Jun 2020 19:13:55 +0200 Subject: [PATCH 74/91] DebugPlugin now support Bmb --- .../scala/vexriscv/plugin/DebugPlugin.scala | 47 +++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/src/main/scala/vexriscv/plugin/DebugPlugin.scala b/src/main/scala/vexriscv/plugin/DebugPlugin.scala index 91185b8..9f2a243 100644 --- a/src/main/scala/vexriscv/plugin/DebugPlugin.scala +++ b/src/main/scala/vexriscv/plugin/DebugPlugin.scala @@ -9,6 +9,8 @@ import spinal.core._ import spinal.lib._ import spinal.lib.bus.amba3.apb.{Apb3, Apb3Config} import spinal.lib.bus.avalon.{AvalonMM, AvalonMMConfig} +import spinal.lib.bus.bmb.{Bmb, BmbAccessParameter, BmbParameter} +import spinal.lib.bus.simple.PipelinedMemoryBus import scala.collection.mutable.ArrayBuffer @@ -22,6 +24,16 @@ case class DebugExtensionRsp() extends Bundle{ val data = Bits(32 bit) } +object DebugExtensionBus{ + def getBmbAccessParameter(source : BmbAccessParameter) = BmbAccessParameter( + addressWidth = 8, + dataWidth = 32, + lengthWidth = 2, + sourceWidth = source.sourceWidth, + contextWidth = source.contextWidth + ) +} + case class DebugExtensionBus() extends Bundle with IMasterSlave{ val cmd = Stream(DebugExtensionCmd()) val rsp = DebugExtensionRsp() //one cycle latency @@ -63,6 +75,41 @@ case class DebugExtensionBus() extends Bundle with IMasterSlave{ bus } + def fromPipelinedMemoryBus(): PipelinedMemoryBus ={ + val bus = PipelinedMemoryBus(32, 32) + + cmd.arbitrationFrom(bus.cmd) + cmd.wr := bus.cmd.write + cmd.address := bus.cmd.address.resized + cmd.data := bus.cmd.data + + bus.rsp.valid := RegNext(cmd.fire) init(False) + bus.rsp.data := rsp.data + + bus + } + + def fromBmb(): Bmb ={ + val bus = Bmb(BmbParameter( + addressWidth = 8, + dataWidth = 32, + lengthWidth = 2, + sourceWidth = 0, + contextWidth = 0 + )) + + cmd.arbitrationFrom(bus.cmd) + cmd.wr := bus.cmd.isWrite + cmd.address := bus.cmd.address + cmd.data := bus.cmd.data + + bus.rsp.valid := RegNext(cmd.fire) init(False) + bus.rsp.data := rsp.data + bus.rsp.last := True + + bus + } + def from(c : SystemDebuggerConfig) : SystemDebuggerMemBus = { val mem = SystemDebuggerMemBus(c) cmd.valid := mem.cmd.valid From db50f04653606e1ee9498552867b5d8a7b125217 Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Sun, 31 May 2020 16:54:05 +0200 Subject: [PATCH 75/91] Add litexMpCluster --- .../demo/smp/VexRiscvSmpLitexCluster.scala | 1 + ....scala => VexRiscvSmpLitexMpCluster.scala} | 26 +++++++++---------- 2 files changed, 14 insertions(+), 13 deletions(-) rename src/main/scala/vexriscv/demo/smp/{VexRiscvSmpLitexDevCluster.scala => VexRiscvSmpLitexMpCluster.scala} (91%) diff --git a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexCluster.scala b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexCluster.scala index f73c859..acf8e4e 100644 --- a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexCluster.scala +++ b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexCluster.scala @@ -115,6 +115,7 @@ case class VexRiscvLitexSmpCluster(p : VexRiscvLitexSmpClusterParameter, io.peripheral << peripheralWishbone } +//ifconfig eth0 192.168.0.50 netmask 255.255.255.0 up object VexRiscvLitexSmpClusterGen extends App { for(cpuCount <- List(1,2,4,8)) { def parameter = VexRiscvLitexSmpClusterParameter( diff --git a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexDevCluster.scala b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexMpCluster.scala similarity index 91% rename from src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexDevCluster.scala rename to src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexMpCluster.scala index fbc184e..6883343 100644 --- a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexDevCluster.scala +++ b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexMpCluster.scala @@ -10,7 +10,7 @@ import spinal.lib.bus.misc.{AddressMapping, DefaultMapping, SizeMapping} import spinal.lib.eda.bench.Bench import spinal.lib.misc.Clint import spinal.lib.sim.{SimData, SparseMemory, StreamDriver, StreamMonitor, StreamReadyRandomizer} -import vexriscv.demo.smp.VexRiscvLitexSmpDevClusterOpenSbi.{cpuCount, parameter} +import vexriscv.demo.smp.VexRiscvLitexSmpMpClusterOpenSbi.{cpuCount, parameter} import vexriscv.demo.smp.VexRiscvSmpClusterGen.vexRiscvConfig import vexriscv.{VexRiscv, VexRiscvConfig} import vexriscv.plugin.{CsrPlugin, DBusCachedPlugin, DebugPlugin, IBusCachedPlugin} @@ -19,12 +19,12 @@ import scala.collection.mutable import scala.util.Random -case class VexRiscvLitexSmpDevClusterParameter( cluster : VexRiscvSmpClusterParameter, +case class VexRiscvLitexSmpMpClusterParameter( cluster : VexRiscvSmpClusterParameter, liteDram : LiteDramNativeParameter, liteDramMapping : AddressMapping) //addAttribute("""mark_debug = "true"""") -case class VexRiscvLitexSmpDevCluster(p : VexRiscvLitexSmpDevClusterParameter, +case class VexRiscvLitexSmpMpCluster(p : VexRiscvLitexSmpMpClusterParameter, debugClockDomain : ClockDomain) extends Component{ val peripheralWishboneConfig = WishboneConfig( @@ -83,8 +83,8 @@ case class VexRiscvLitexSmpDevCluster(p : VexRiscvLitexSmpDevClusterParameter, ) decoder.io.input << cluster.io.iMems(id) - io.iMem(id).fromBmb(decoder.io.outputs(1), wdataFifoSize = 0, rdataFifoSize = 32) - val toPeripheral = decoder.io.outputs(0).resize(dataWidth = 32) + io.iMem(id).fromBmb(decoder.io.outputs(1).pipelined(cmdHalfRate = true), wdataFifoSize = 0, rdataFifoSize = 32) + val toPeripheral = decoder.io.outputs(0).resize(dataWidth = 32).pipelined(cmdHalfRate = true, rspValid = true) } val dBusDecoderToPeripheral = dBusDecoder.io.outputs(0).resize(dataWidth = 32).pipelined(cmdHalfRate = true, rspValid = true) @@ -111,16 +111,16 @@ case class VexRiscvLitexSmpDevCluster(p : VexRiscvLitexSmpDevClusterParameter, val dBusDemux = BmbSourceDecoder(dBusDecoder.io.outputs(1).p) - dBusDemux.io.input << dBusDecoder.io.outputs(1) + dBusDemux.io.input << dBusDecoder.io.outputs(1).pipelined(cmdValid = true, cmdReady = true,rspValid = true) val dMemBridge = for(id <- 0 until cpuCount) yield { io.dMem(id).fromBmb(dBusDemux.io.outputs(id), wdataFifoSize = 32, rdataFifoSize = 32) } } -object VexRiscvLitexSmpDevClusterGen extends App { +object VexRiscvLitexSmpMpClusterGen extends App { for(cpuCount <- List(1,2,4,8)) { - def parameter = VexRiscvLitexSmpDevClusterParameter( + def parameter = VexRiscvLitexSmpMpClusterParameter( cluster = VexRiscvSmpClusterParameter( cpuConfigs = List.tabulate(cpuCount) { hartId => vexRiscvConfig( @@ -135,7 +135,7 @@ object VexRiscvLitexSmpDevClusterGen extends App { ) def dutGen = { - val toplevel = VexRiscvLitexSmpDevCluster( + val toplevel = VexRiscvLitexSmpMpCluster( p = parameter, debugClockDomain = ClockDomain.current.copy(reset = Bool().setName("debugResetIn")) ) @@ -144,13 +144,13 @@ object VexRiscvLitexSmpDevClusterGen extends App { val genConfig = SpinalConfig().addStandardMemBlackboxing(blackboxByteEnables) // genConfig.generateVerilog(Bench.compressIo(dutGen)) - genConfig.generateVerilog(dutGen.setDefinitionName(s"VexRiscvLitexSmpDevCluster_${cpuCount}c")) + genConfig.generateVerilog(dutGen.setDefinitionName(s"VexRiscvLitexSmpMpCluster_${cpuCount}c")) } } -object VexRiscvLitexSmpDevClusterOpenSbi extends App{ +object VexRiscvLitexSmpMpClusterOpenSbi extends App{ import spinal.core.sim._ val simConfig = SimConfig @@ -159,7 +159,7 @@ object VexRiscvLitexSmpDevClusterOpenSbi extends App{ val cpuCount = 4 - def parameter = VexRiscvLitexSmpDevClusterParameter( + def parameter = VexRiscvLitexSmpMpClusterParameter( cluster = VexRiscvSmpClusterParameter( cpuConfigs = List.tabulate(cpuCount) { hartId => vexRiscvConfig( @@ -174,7 +174,7 @@ object VexRiscvLitexSmpDevClusterOpenSbi extends App{ ) def dutGen = { - val top = VexRiscvLitexSmpDevCluster( + val top = VexRiscvLitexSmpMpCluster( p = parameter, debugClockDomain = ClockDomain.current.copy(reset = Bool().setName("debugResetIn")) ) From 73f88e47cb83d71c2bc29669054d165652eb8fcc Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Wed, 3 Jun 2020 16:29:21 +0200 Subject: [PATCH 76/91] Fix BmbToLitexDram coherency --- src/main/scala/vexriscv/demo/smp/Misc.scala | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/main/scala/vexriscv/demo/smp/Misc.scala b/src/main/scala/vexriscv/demo/smp/Misc.scala index a7965a4..b192a9c 100644 --- a/src/main/scala/vexriscv/demo/smp/Misc.scala +++ b/src/main/scala/vexriscv/demo/smp/Misc.scala @@ -205,10 +205,17 @@ case class BmbToLiteDram(bmbParameter : BmbParameter, val rspContext = cmdContext.queue(rdataFifoSize) val rdataFifo = io.output.rdata.queueLowLatency(rdataFifoSize, latency = 1) + val writeTocken = CounterUpDown( + stateCount = rdataFifoSize*2, + incWhen = io.output.wdata.fire, + decWhen = rspContext.fire && rspContext.isWrite + ) + val canRspWrite = writeTocken =/= 0 + val canRspRead = CombInit(rdataFifo.valid) rdataFifo.ready := unburstified.rsp.fire && !rspContext.isWrite rspContext.ready := unburstified.rsp.fire - unburstified.rsp.valid := rspContext.valid && (rspContext.isWrite || rdataFifo.valid) + unburstified.rsp.valid := rspContext.valid && (rspContext.isWrite ? canRspWrite | canRspRead) unburstified.rsp.setSuccess() unburstified.rsp.last := True unburstified.rsp.source := rspContext.source From 89c13bedbdb90bfcbfd29cddbf5179715ddbecc3 Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Wed, 3 Jun 2020 16:31:34 +0200 Subject: [PATCH 77/91] Fix litex smp cluster sim --- .../demo/smp/VexRiscvSmpCluster.scala | 2 +- .../demo/smp/VexRiscvSmpLitexCluster.scala | 4 +- .../demo/smp/VexRiscvSmpLitexMpCluster.scala | 76 +++++++++++++------ 3 files changed, 55 insertions(+), 27 deletions(-) diff --git a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala index 422b1e2..97f1876 100644 --- a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala +++ b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala @@ -466,7 +466,7 @@ object VexRiscvSmpClusterOpenSbi extends App{ simConfig.allOptimisation simConfig.addSimulatorFlag("--threads 1") - val cpuCount = 4 + val cpuCount = 2 val withStall = false def gen = { diff --git a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexCluster.scala b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexCluster.scala index acf8e4e..ea5cd39 100644 --- a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexCluster.scala +++ b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexCluster.scala @@ -155,7 +155,7 @@ object VexRiscvLitexSmpClusterOpenSbi extends App{ simConfig.withWave simConfig.allOptimisation - val cpuCount = 8 + val cpuCount = 2 def parameter = VexRiscvLitexSmpClusterParameter( cluster = VexRiscvSmpClusterParameter( @@ -218,7 +218,7 @@ object VexRiscvLitexSmpClusterOpenSbi extends App{ dut.io.externalInterrupts #= 0 dut.io.externalSupervisorInterrupts #= 0 - dut.clockDomain.onSamplings{ + dut.clockDomain.onFallingEdges{ if(dut.io.peripheral.CYC.toBoolean){ (dut.io.peripheral.ADR.toLong << 2) match { case 0xF0000000l => print(dut.io.peripheral.DAT_MOSI.toLong.toChar) diff --git a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexMpCluster.scala b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexMpCluster.scala index 6883343..1342b70 100644 --- a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexMpCluster.scala +++ b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexMpCluster.scala @@ -115,7 +115,16 @@ case class VexRiscvLitexSmpMpCluster(p : VexRiscvLitexSmpMpClusterParameter, val dMemBridge = for(id <- 0 until cpuCount) yield { io.dMem(id).fromBmb(dBusDemux.io.outputs(id), wdataFifoSize = 32, rdataFifoSize = 32) } - +// +// io.dMem.foreach(_.cmd.valid.addAttribute("""mark_debug = "true"""")) +// io.dMem.foreach(_.cmd.ready.addAttribute("""mark_debug = "true"""")) +// io.iMem.foreach(_.cmd.valid.addAttribute("""mark_debug = "true"""")) +// io.iMem.foreach(_.cmd.ready.addAttribute("""mark_debug = "true"""")) +// +// cluster.io.dMem.cmd.valid.addAttribute("""mark_debug = "true"""") +// cluster.io.dMem.cmd.ready.addAttribute("""mark_debug = "true"""") +// cluster.io.dMem.rsp.valid.addAttribute("""mark_debug = "true"""") +// cluster.io.dMem.rsp.ready.addAttribute("""mark_debug = "true"""") } object VexRiscvLitexSmpMpClusterGen extends App { @@ -155,9 +164,10 @@ object VexRiscvLitexSmpMpClusterOpenSbi extends App{ val simConfig = SimConfig simConfig.withWave + simConfig.withFstWave simConfig.allOptimisation - val cpuCount = 4 + val cpuCount = 2 def parameter = VexRiscvLitexSmpMpClusterParameter( cluster = VexRiscvSmpClusterParameter( @@ -221,41 +231,59 @@ object VexRiscvLitexSmpMpClusterOpenSbi extends App{ dut.io.externalInterrupts #= 0 dut.io.externalSupervisorInterrupts #= 0 - dut.clockDomain.onSamplings{ +// val stdin = mutable.Queue[Byte]() +// def stdInPush(str : String) = stdin ++= str.toCharArray.map(_.toByte) +// fork{ +// sleep(4000*1000000l) +// stdInPush("root\n") +// sleep(1000*1000000l) +// stdInPush("ping localhost -i 0.01 > /dev/null &\n") +// stdInPush("ping localhost -i 0.01 > /dev/null &\n") +// stdInPush("ping localhost -i 0.01 > /dev/null &\n") +// stdInPush("ping localhost -i 0.01 > /dev/null &\n") +// sleep(500*1000000l) +// while(true){ +// sleep(500*1000000l) +// stdInPush("uptime\n") +// printf("\n** uptime **") +// } +// } + dut.clockDomain.onFallingEdges{ if(dut.io.peripheral.CYC.toBoolean){ (dut.io.peripheral.ADR.toLong << 2) match { case 0xF0000000l => print(dut.io.peripheral.DAT_MOSI.toLong.toChar) case 0xF0000004l => dut.io.peripheral.DAT_MISO #= (if(System.in.available() != 0) System.in.read() else 0xFFFFFFFFl) - case _ => - } +// case 0xF0000004l => { +// val c = if(stdin.nonEmpty) { +// stdin.dequeue().toInt & 0xFF +// } else { +// 0xFFFFFFFFl +// } +// dut.io.peripheral.DAT_MISO #= c +// } +// case _ => +// } // println(f"${dut.io.peripheral.ADR.toLong}%x") } } -// fork{ -// disableSimWave() -// val atMs = 3790 -// val durationMs = 5 -// sleep(atMs*1000000l) -// enableSimWave() -// println("** enableSimWave **") -// sleep(durationMs*1000000l) -// println("** disableSimWave **") -// while(true) { -// disableSimWave() -// sleep(100000 * 10) -// enableSimWave() -// sleep( 100 * 10) -// } -// // simSuccess() -// } - fork{ + val at = 0 + val duration = 0 + while(simTime() < at*1000000l) { + disableSimWave() + sleep(100000 * 10) + enableSimWave() + sleep( 200 * 10) + } + println("\n\n********************") + sleep(duration*1000000l) + println("********************\n\n") while(true) { disableSimWave() sleep(100000 * 10) enableSimWave() - sleep( 100 * 10) + sleep( 400 * 10) } } } From 97c2dc270c85b4cc4c984db254507a9c87c4c6c6 Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Thu, 4 Jun 2020 10:11:30 +0200 Subject: [PATCH 78/91] Fix typo --- .../demo/smp/VexRiscvSmpLitexMpCluster.scala | 191 +++++++++--------- 1 file changed, 96 insertions(+), 95 deletions(-) diff --git a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexMpCluster.scala b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexMpCluster.scala index 1342b70..ceb5a36 100644 --- a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexMpCluster.scala +++ b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexMpCluster.scala @@ -160,76 +160,76 @@ object VexRiscvLitexSmpMpClusterGen extends App { object VexRiscvLitexSmpMpClusterOpenSbi extends App{ - import spinal.core.sim._ + import spinal.core.sim._ - val simConfig = SimConfig - simConfig.withWave - simConfig.withFstWave - simConfig.allOptimisation + val simConfig = SimConfig + simConfig.withWave + simConfig.withFstWave + simConfig.allOptimisation - val cpuCount = 2 + val cpuCount = 2 - def parameter = VexRiscvLitexSmpMpClusterParameter( - cluster = VexRiscvSmpClusterParameter( - cpuConfigs = List.tabulate(cpuCount) { hartId => - vexRiscvConfig( - hartId = hartId, - ioRange = address => address(31 downto 28) === 0xF, - resetVector = 0x80000000l - ) - } - ), - liteDram = LiteDramNativeParameter(addressWidth = 32, dataWidth = 128), - liteDramMapping = SizeMapping(0x80000000l, 0x70000000l) + def parameter = VexRiscvLitexSmpMpClusterParameter( + cluster = VexRiscvSmpClusterParameter( + cpuConfigs = List.tabulate(cpuCount) { hartId => + vexRiscvConfig( + hartId = hartId, + ioRange = address => address(31 downto 28) === 0xF, + resetVector = 0x80000000l + ) + } + ), + liteDram = LiteDramNativeParameter(addressWidth = 32, dataWidth = 128), + liteDramMapping = SizeMapping(0x80000000l, 0x70000000l) + ) + + def dutGen = { + val top = VexRiscvLitexSmpMpCluster( + p = parameter, + debugClockDomain = ClockDomain.current.copy(reset = Bool().setName("debugResetIn")) ) + top.rework{ + top.io.clint.setAsDirectionLess.allowDirectionLessIo + top.io.peripheral.setAsDirectionLess.allowDirectionLessIo.simPublic() - def dutGen = { - val top = VexRiscvLitexSmpMpCluster( - p = parameter, - debugClockDomain = ClockDomain.current.copy(reset = Bool().setName("debugResetIn")) - ) - top.rework{ - top.io.clint.setAsDirectionLess.allowDirectionLessIo - top.io.peripheral.setAsDirectionLess.allowDirectionLessIo.simPublic() - - val hit = (top.io.peripheral.ADR <<2 >= 0xF0010000l && top.io.peripheral.ADR<<2 < 0xF0020000l) - top.io.clint.CYC := top.io.peripheral.CYC && hit - top.io.clint.STB := top.io.peripheral.STB - top.io.clint.WE := top.io.peripheral.WE - top.io.clint.ADR := top.io.peripheral.ADR.resized - top.io.clint.DAT_MOSI := top.io.peripheral.DAT_MOSI - top.io.peripheral.DAT_MISO := top.io.clint.DAT_MISO - top.io.peripheral.ACK := top.io.peripheral.CYC && (!hit || top.io.clint.ACK) - top.io.peripheral.ERR := False + val hit = (top.io.peripheral.ADR <<2 >= 0xF0010000l && top.io.peripheral.ADR<<2 < 0xF0020000l) + top.io.clint.CYC := top.io.peripheral.CYC && hit + top.io.clint.STB := top.io.peripheral.STB + top.io.clint.WE := top.io.peripheral.WE + top.io.clint.ADR := top.io.peripheral.ADR.resized + top.io.clint.DAT_MOSI := top.io.peripheral.DAT_MOSI + top.io.peripheral.DAT_MISO := top.io.clint.DAT_MISO + top.io.peripheral.ACK := top.io.peripheral.CYC && (!hit || top.io.clint.ACK) + top.io.peripheral.ERR := False // top.dMemBridge.unburstified.cmd.simPublic() - } - top } - simConfig.compile(dutGen).doSimUntilVoid(seed = 42){dut => - dut.clockDomain.forkStimulus(10) - fork { - dut.debugClockDomain.resetSim #= false - sleep (0) - dut.debugClockDomain.resetSim #= true - sleep (10) - dut.debugClockDomain.resetSim #= false - } + top + } + simConfig.compile(dutGen).doSimUntilVoid(seed = 42){dut => + dut.clockDomain.forkStimulus(10) + fork { + dut.debugClockDomain.resetSim #= false + sleep (0) + dut.debugClockDomain.resetSim #= true + sleep (10) + dut.debugClockDomain.resetSim #= false + } - val ram = SparseMemory() - ram.loadBin(0x80000000l, "../opensbi/build/platform/spinal/vexriscv/sim/smp/firmware/fw_jump.bin") - ram.loadBin(0xC0000000l, "../buildroot/output/images/Image") - ram.loadBin(0xC1000000l, "../buildroot/output/images/dtb") - ram.loadBin(0xC2000000l, "../buildroot/output/images/rootfs.cpio") + val ram = SparseMemory() + ram.loadBin(0x80000000l, "../opensbi/build/platform/spinal/vexriscv/sim/smp/firmware/fw_jump.bin") + ram.loadBin(0xC0000000l, "../buildroot/output/images/Image") + ram.loadBin(0xC1000000l, "../buildroot/output/images/dtb") + ram.loadBin(0xC2000000l, "../buildroot/output/images/rootfs.cpio") - for(id <- 0 until cpuCount) { - dut.io.iMem(id).simSlave(ram, dut.clockDomain) - dut.io.dMem(id).simSlave(ram, dut.clockDomain) - } + for(id <- 0 until cpuCount) { + dut.io.iMem(id).simSlave(ram, dut.clockDomain) + dut.io.dMem(id).simSlave(ram, dut.clockDomain) + } - dut.io.externalInterrupts #= 0 - dut.io.externalSupervisorInterrupts #= 0 + dut.io.externalInterrupts #= 0 + dut.io.externalSupervisorInterrupts #= 0 // val stdin = mutable.Queue[Byte]() // def stdInPush(str : String) = stdin ++= str.toCharArray.map(_.toByte) @@ -248,43 +248,44 @@ object VexRiscvLitexSmpMpClusterOpenSbi extends App{ // printf("\n** uptime **") // } // } - dut.clockDomain.onFallingEdges{ - if(dut.io.peripheral.CYC.toBoolean){ - (dut.io.peripheral.ADR.toLong << 2) match { - case 0xF0000000l => print(dut.io.peripheral.DAT_MOSI.toLong.toChar) - case 0xF0000004l => dut.io.peripheral.DAT_MISO #= (if(System.in.available() != 0) System.in.read() else 0xFFFFFFFFl) -// case 0xF0000004l => { -// val c = if(stdin.nonEmpty) { -// stdin.dequeue().toInt & 0xFF -// } else { -// 0xFFFFFFFFl -// } -// dut.io.peripheral.DAT_MISO #= c -// } -// case _ => -// } -// println(f"${dut.io.peripheral.ADR.toLong}%x") - } - } - - fork{ - val at = 0 - val duration = 0 - while(simTime() < at*1000000l) { - disableSimWave() - sleep(100000 * 10) - enableSimWave() - sleep( 200 * 10) - } - println("\n\n********************") - sleep(duration*1000000l) - println("********************\n\n") - while(true) { - disableSimWave() - sleep(100000 * 10) - enableSimWave() - sleep( 400 * 10) + dut.clockDomain.onFallingEdges { + if (dut.io.peripheral.CYC.toBoolean) { + (dut.io.peripheral.ADR.toLong << 2) match { + case 0xF0000000l => print(dut.io.peripheral.DAT_MOSI.toLong.toChar) + case 0xF0000004l => dut.io.peripheral.DAT_MISO #= (if (System.in.available() != 0) System.in.read() else 0xFFFFFFFFl) + // case 0xF0000004l => { + // val c = if(stdin.nonEmpty) { + // stdin.dequeue().toInt & 0xFF + // } else { + // 0xFFFFFFFFl + // } + // dut.io.peripheral.DAT_MISO #= c + // } + // case _ => + // } + // println(f"${dut.io.peripheral.ADR.toLong}%x") } } } - } \ No newline at end of file + + fork{ + val at = 0 + val duration = 0 + while(simTime() < at*1000000l) { + disableSimWave() + sleep(100000 * 10) + enableSimWave() + sleep( 200 * 10) + } + println("\n\n********************") + sleep(duration*1000000l) + println("********************\n\n") + while(true) { + disableSimWave() + sleep(100000 * 10) + enableSimWave() + sleep( 400 * 10) + } + } + } +} \ No newline at end of file From 06680464078f0079fdc33434bc32fc70b12b3bcc Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Fri, 5 Jun 2020 10:40:51 +0200 Subject: [PATCH 79/91] More smp cluster profiling --- .../demo/smp/VexRiscvSmpCluster.scala | 21 ++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala index 97f1876..0e237c6 100644 --- a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala +++ b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala @@ -497,9 +497,11 @@ object VexRiscvSmpClusterOpenSbi extends App{ ram.memory.loadBin(0xC2000000l, "../buildroot/output/images/rootfs.cpio") import spinal.core.sim._ - var iMemReadBytes, dMemReadBytes, dMemWriteBytes, iMemSequencial,iMemRequests = 0l + var iMemReadBytes, dMemReadBytes, dMemWriteBytes, iMemSequencial,iMemRequests, iMemPrefetchHit = 0l var reportTimer = 0 var reportCycle = 0 + val iMemFetchDelta = mutable.HashMap[Long, Long]() + var iMemFetchDeltaSorted : Seq[(Long, Long)] = null var dMemWrites, dMemWritesCached = 0l val dMemWriteCacheCtx = List(4,8,16,32,64).map(bytes => new { var counter = 0l @@ -512,6 +514,7 @@ object VexRiscvSmpClusterOpenSbi extends App{ val iMemCtx = Array.tabulate(cpuCount)(i => new { var sequencialPrediction = 0l val cache = dut.cpus(i).core.children.find(_.isInstanceOf[InstructionCache]).head.asInstanceOf[InstructionCache].io.cpu.decode + var lastAddress = 0l }) dut.clockDomain.onSamplings{ for(i <- 0 until cpuCount; iMem = dut.io.iMems(i); ctx = iMemCtx(i)){ @@ -527,7 +530,6 @@ object VexRiscvSmpClusterOpenSbi extends App{ val mask = ~(length-1) if(ctx.cache.cacheMiss.toBoolean) { iMemReadBytes += length - iMemRequests += 1 if ((address & mask) == (ctx.sequencialPrediction & mask)) { iMemSequencial += 1 } @@ -536,6 +538,18 @@ object VexRiscvSmpClusterOpenSbi extends App{ ctx.sequencialPrediction = address + length } } + + if(iMem.cmd.valid.toBoolean && iMem.cmd.ready.toBoolean){ + val address = iMem.cmd.address.toLong + iMemRequests += 1 + if(iMemCtx(i).lastAddress + ctx.cache.p.bytePerLine == address){ + iMemPrefetchHit += 1 + } + val delta = address-iMemCtx(i).lastAddress + iMemFetchDelta(delta) = iMemFetchDelta.getOrElse(delta, 0l) + 1l + if(iMemRequests % 1000 == 999) iMemFetchDeltaSorted = iMemFetchDelta.toSeq.sortBy(_._1) + iMemCtx(i).lastAddress = address + } } if(dut.io.dMem.cmd.valid.toBoolean && dut.io.dMem.cmd.ready.toBoolean){ if(dut.io.dMem.cmd.opcode.toInt == Bmb.Cmd.Opcode.WRITE){ @@ -561,7 +575,7 @@ object VexRiscvSmpClusterOpenSbi extends App{ // println(f"\n** c=${reportCycle} ir=${iMemReadBytes*1e-6}%5.2f dr=${dMemReadBytes*1e-6}%5.2f dw=${dMemWriteBytes*1e-6}%5.2f **\n") - csv.write(s"$reportCycle,$iMemReadBytes,$dMemReadBytes,$dMemWriteBytes,$iMemRequests,$iMemSequencial,$dMemWrites,${dMemWriteCacheCtx.map(_.counter).mkString(",")}\n") + csv.write(s"$reportCycle,$iMemReadBytes,$dMemReadBytes,$dMemWriteBytes,$iMemRequests,$iMemSequencial,$dMemWrites,${dMemWriteCacheCtx.map(_.counter).mkString(",")},$iMemPrefetchHit\n") csv.flush() reportCycle = 0 iMemReadBytes = 0 @@ -570,6 +584,7 @@ object VexRiscvSmpClusterOpenSbi extends App{ iMemRequests = 0 iMemSequencial = 0 dMemWrites = 0 + iMemPrefetchHit = 0 for(ctx <- dMemWriteCacheCtx) ctx.counter = 0 } } From 3dafe8708b8acb70b21623671aa10726803f4992 Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Fri, 5 Jun 2020 10:34:57 +0200 Subject: [PATCH 80/91] Cfu update --- .../demo/GenSmallAndProductiveCfu.scala | 2 +- .../scala/vexriscv/plugin/CfuPlugin.scala | 71 ++++++++++++++++--- 2 files changed, 62 insertions(+), 11 deletions(-) diff --git a/src/main/scala/vexriscv/demo/GenSmallAndProductiveCfu.scala b/src/main/scala/vexriscv/demo/GenSmallAndProductiveCfu.scala index c500452..960242f 100644 --- a/src/main/scala/vexriscv/demo/GenSmallAndProductiveCfu.scala +++ b/src/main/scala/vexriscv/demo/GenSmallAndProductiveCfu.scala @@ -53,7 +53,7 @@ object GenSmallAndProductiveCfu extends App{ new CfuPlugin( stageCount = 1, allowZeroLatency = true, - encoding = M"000000-------------------0001011", +// encoding = M"000000-------------------0001011", busParameter = CfuBusParameter( CFU_VERSION = 0, CFU_INTERFACE_ID_W = 0, diff --git a/src/main/scala/vexriscv/plugin/CfuPlugin.scala b/src/main/scala/vexriscv/plugin/CfuPlugin.scala index d5aaf1c..de0ae91 100644 --- a/src/main/scala/vexriscv/plugin/CfuPlugin.scala +++ b/src/main/scala/vexriscv/plugin/CfuPlugin.scala @@ -5,6 +5,7 @@ import spinal.core._ import spinal.lib._ import spinal.lib.bus.bmb.WeakConnector import spinal.lib.bus.misc.{AddressMapping, DefaultMapping} +import vexriscv.Riscv.IMM case class CfuPluginParameter( CFU_VERSION : Int, @@ -78,12 +79,20 @@ case class CfuBus(p : CfuBusParameter) extends Bundle with IMasterSlave{ } } +object CfuPlugin{ + object Input2Kind extends SpinalEnum{ + val RS, IMM_I = newElement() + } +} +case class CfuPluginEncoding(instruction : MaskedLiteral, + functionId : List[Range], + input2Kind : CfuPlugin.Input2Kind.E) class CfuPlugin( val stageCount : Int, val allowZeroLatency : Boolean, - val encoding : MaskedLiteral, - val busParameter : CfuBusParameter) extends Plugin[VexRiscv]{ + val busParameter : CfuBusParameter, + val encodings : List[CfuPluginEncoding] = null) extends Plugin[VexRiscv]{ def p = busParameter assert(p.CFU_INPUTS <= 2) @@ -99,7 +108,8 @@ class CfuPlugin( val stageCount : Int, val CFU_ENABLE = new Stageable(Bool()).setCompositeName(this, "CFU_ENABLE") val CFU_IN_FLIGHT = new Stageable(Bool()).setCompositeName(this, "CFU_IN_FLIGHT") - + val CFU_ENCODING = new Stageable(UInt(log2Up(encodings.size) bits)).setCompositeName(this, "CFU_ENCODING") + val CFU_INPUT_2_KIND = new Stageable(CfuPlugin.Input2Kind()).setCompositeName(this, "CFU_ENCODING") override def setup(pipeline: VexRiscv): Unit = { import pipeline._ @@ -111,17 +121,53 @@ class CfuPlugin( val stageCount : Int, val decoderService = pipeline.service(classOf[DecoderService]) decoderService.addDefault(CFU_ENABLE, False) - //custom-0 - decoderService.add(List( - encoding -> List( + for((encoding, id) <- encodings.zipWithIndex){ + var actions = List( CFU_ENABLE -> True, REGFILE_WRITE_VALID -> True, BYPASSABLE_EXECUTE_STAGE -> Bool(stageCount == 0), BYPASSABLE_MEMORY_STAGE -> Bool(stageCount <= 1), RS1_USE -> True, - RS2_USE -> True + CFU_ENCODING -> id, + CFU_INPUT_2_KIND -> encoding.input2Kind() ) - )) + + encoding.input2Kind match { + case CfuPlugin.Input2Kind.RS => + actions :+= RS2_USE -> True + case CfuPlugin.Input2Kind.IMM_I => + } + + decoderService.add( + key = encoding.instruction, + values = actions + ) + } + +// decoderService.add(List( +// //custom-0 +// M"-------------------------0001011" -> List( +// CFU_ENABLE -> True, +// REGFILE_WRITE_VALID -> True, +// BYPASSABLE_EXECUTE_STAGE -> Bool(stageCount == 0), +// BYPASSABLE_MEMORY_STAGE -> Bool(stageCount <= 1), +// RS1_USE -> True, +// RS2_USE -> True, +// CFU_IMM -> False +// ), +// +// //custom-1 +// M"-------------------------0101011" -> List( +// CFU_ENABLE -> True, +// REGFILE_WRITE_VALID -> True, +// BYPASSABLE_EXECUTE_STAGE -> Bool(stageCount == 0), +// BYPASSABLE_MEMORY_STAGE -> Bool(stageCount <= 1), +// RS1_USE -> True, +// CFU_IMM -> True +// ) +// )) + + } override def build(pipeline: VexRiscv): Unit = { @@ -139,11 +185,16 @@ class CfuPlugin( val stageCount : Int, bus.cmd.valid := (schedule || hold) && !fired arbitration.haltItself setWhen(bus.cmd.valid && !bus.cmd.ready) - bus.cmd.function_id := U(input(INSTRUCTION)(14 downto 12)).resized +// bus.cmd.function_id := U(input(INSTRUCTION)(14 downto 12)).resized + val functionsIds = encodings.map(e => U(Cat(e.functionId.map(r => input(INSTRUCTION)(r))), busParameter.CFU_FUNCTION_ID_W bits)) + bus.cmd.function_id := functionsIds.read(input(CFU_ENCODING)) bus.cmd.reorder_id := 0 bus.cmd.request_id := 0 if(p.CFU_INPUTS >= 1) bus.cmd.inputs(0) := input(RS1) - if(p.CFU_INPUTS >= 2) bus.cmd.inputs(1) := input(RS2) + if(p.CFU_INPUTS >= 2) bus.cmd.inputs(1) := input(CFU_INPUT_2_KIND).mux( + CfuPlugin.Input2Kind.RS -> input(RS2), + CfuPlugin.Input2Kind.IMM_I -> IMM(input(INSTRUCTION)).i_sext + ) } joinStage plug new Area{ From 71760ea3724ed854a1d831fbd9089f66ef2f41cd Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Fri, 5 Jun 2020 10:35:50 +0200 Subject: [PATCH 81/91] CsrPlugin now support utime csr to avoid emulation --- src/main/scala/vexriscv/Riscv.scala | 7 +++++-- src/main/scala/vexriscv/plugin/CsrPlugin.scala | 8 ++++++++ 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/src/main/scala/vexriscv/Riscv.scala b/src/main/scala/vexriscv/Riscv.scala index 90a40c5..ee9be3d 100644 --- a/src/main/scala/vexriscv/Riscv.scala +++ b/src/main/scala/vexriscv/Riscv.scala @@ -159,7 +159,10 @@ object Riscv{ - def UCYCLE = 0xC00 // UR Machine ucycle counter. - def UCYCLEH = 0xC80 + def UCYCLE = 0xC00 // UR Machine ucycle counter. + def UCYCLEH = 0xC80 + def UTIME = 0xC01 // rdtime + def UTIMEH = 0xC81 + } } diff --git a/src/main/scala/vexriscv/plugin/CsrPlugin.scala b/src/main/scala/vexriscv/plugin/CsrPlugin.scala index ebff1e0..9dfad38 100644 --- a/src/main/scala/vexriscv/plugin/CsrPlugin.scala +++ b/src/main/scala/vexriscv/plugin/CsrPlugin.scala @@ -66,6 +66,7 @@ case class CsrPluginConfig( scycleAccess : CsrAccess = CsrAccess.NONE, sinstretAccess : CsrAccess = CsrAccess.NONE, satpAccess : CsrAccess = CsrAccess.NONE, + utimeAccess :CsrAccess = CsrAccess.NONE, medelegAccess : CsrAccess = CsrAccess.NONE, midelegAccess : CsrAccess = CsrAccess.NONE, withExternalMhartid : Boolean = false, @@ -390,6 +391,7 @@ class CsrPlugin(val config: CsrPluginConfig) extends Plugin[VexRiscv] with Excep var thirdPartyWake : Bool = null var inWfi : Bool = null var externalMhartId : UInt = null + var utime : UInt = null override def askWake(): Unit = thirdPartyWake := True @@ -520,6 +522,7 @@ class CsrPlugin(val config: CsrPluginConfig) extends Plugin[VexRiscv] with Excep pipeline.update(MPP, UInt(2 bits)) if(withExternalMhartid) externalMhartId = in UInt(mhartidWidth bits) + if(utimeAccess != CsrAccess.NONE) utime = in UInt(64 bits) setName("utime") } def inhibateInterrupts() : Unit = allowInterrupts := False @@ -634,6 +637,11 @@ class CsrPlugin(val config: CsrPluginConfig) extends Plugin[VexRiscv] with Excep ucycleAccess(CSR.UCYCLE, mcycle(31 downto 0)) ucycleAccess(CSR.UCYCLEH, mcycle(63 downto 32)) + if(utimeAccess != CsrAccess.NONE) { + utimeAccess(CSR.UTIME, utime(31 downto 0)) + utimeAccess(CSR.UTIMEH, utime(63 downto 32)) + } + pipeline(MPP) := mstatus.MPP } From d6455817e7e34bb0f7b43a83bcb8b39b04eb6858 Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Fri, 5 Jun 2020 10:43:00 +0200 Subject: [PATCH 82/91] smp cluster now have 2w*4KB of d$ , no more rdtime emulation --- src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala index 0e237c6..5c1a34b 100644 --- a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala +++ b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala @@ -9,7 +9,7 @@ import spinal.lib.bus.bmb.{Bmb, BmbArbiter, BmbDecoder, BmbExclusiveMonitor, Bmb import spinal.lib.com.jtag.Jtag import spinal.lib.com.jtag.sim.JtagTcp import vexriscv.ip.{DataCacheAck, DataCacheConfig, DataCacheMemBus, InstructionCache, InstructionCacheConfig} -import vexriscv.plugin.{BranchPlugin, CsrPlugin, CsrPluginConfig, DBusCachedPlugin, DBusSimplePlugin, DYNAMIC_TARGET, DebugPlugin, DecoderSimplePlugin, FullBarrelShifterPlugin, HazardSimplePlugin, IBusCachedPlugin, IBusSimplePlugin, IntAluPlugin, MmuPlugin, MmuPortConfig, MulDivIterativePlugin, MulPlugin, RegFilePlugin, STATIC, SrcPlugin, YamlPlugin} +import vexriscv.plugin.{BranchPlugin, CsrAccess, CsrPlugin, CsrPluginConfig, DBusCachedPlugin, DBusSimplePlugin, DYNAMIC_TARGET, DebugPlugin, DecoderSimplePlugin, FullBarrelShifterPlugin, HazardSimplePlugin, IBusCachedPlugin, IBusSimplePlugin, IntAluPlugin, MmuPlugin, MmuPortConfig, MulDivIterativePlugin, MulPlugin, RegFilePlugin, STATIC, SrcPlugin, YamlPlugin} import vexriscv.{Riscv, VexRiscv, VexRiscvConfig, plugin} import scala.collection.mutable @@ -160,9 +160,9 @@ object VexRiscvSmpClusterGen { dBusRspSlavePipe = true, relaxedMemoryTranslationRegister = true, config = new DataCacheConfig( - cacheSize = 4096*1, + cacheSize = 4096*2, bytePerLine = 64, - wayCount = 1, + wayCount = 2, addressWidth = 32, cpuDataWidth = 32, memDataWidth = dBusWidth, @@ -213,7 +213,7 @@ object VexRiscvSmpClusterGen { mulUnrollFactor = 32, divUnrollFactor = 1 ), - new CsrPlugin(CsrPluginConfig.openSbi(mhartid = hartId, misa = Riscv.misaToInt("imas"))), + new CsrPlugin(CsrPluginConfig.openSbi(mhartid = hartId, misa = Riscv.misaToInt("imas")).copy(utimeAccess = CsrAccess.READ_ONLY)), new BranchPlugin( earlyBranch = false, catchAddressMisaligned = true, From 760d2f74d0f2431827b29cacdb7bd440d6dfc48f Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Fri, 5 Jun 2020 13:31:24 +0200 Subject: [PATCH 83/91] Update litex cluster to implement utime --- src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala | 5 +++++ .../scala/vexriscv/demo/smp/VexRiscvSmpLitexCluster.scala | 1 + .../scala/vexriscv/demo/smp/VexRiscvSmpLitexMpCluster.scala | 2 ++ 3 files changed, 8 insertions(+) diff --git a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala index 5c1a34b..de29409 100644 --- a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala +++ b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala @@ -40,6 +40,7 @@ case class VexRiscvSmpCluster(p : VexRiscvSmpClusterParameter, val externalSupervisorInterrupts = in Bits(p.cpuConfigs.size bits) val jtag = slave(Jtag()) val debugReset = out Bool() + val time = in UInt(64 bits) } val cpus = for((cpuConfig, cpuId) <- p.cpuConfigs.zipWithIndex) yield new Area{ @@ -61,6 +62,7 @@ case class VexRiscvSmpCluster(p : VexRiscvSmpClusterParameter, plugin.externalInterrupt := io.externalInterrupts(cpuId) plugin.timerInterrupt := io.timerInterrupts(cpuId) if (plugin.config.supervisorGen) plugin.externalInterruptS := io.externalSupervisorInterrupts(cpuId) + if (plugin.utime != null) plugin.utime := io.time } case plugin: DebugPlugin => debugClockDomain{ io.debugReset := RegNext(plugin.io.resetOut) @@ -517,6 +519,9 @@ object VexRiscvSmpClusterOpenSbi extends App{ var lastAddress = 0l }) dut.clockDomain.onSamplings{ + dut.io.time #= simTime()/10 + + for(i <- 0 until cpuCount; iMem = dut.io.iMems(i); ctx = iMemCtx(i)){ // if(iMem.cmd.valid.toBoolean && iMem.cmd.ready.toBoolean){ // val length = iMem.cmd.length.toInt + 1 diff --git a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexCluster.scala b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexCluster.scala index ea5cd39..9466cfe 100644 --- a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexCluster.scala +++ b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexCluster.scala @@ -57,6 +57,7 @@ case class VexRiscvLitexSmpCluster(p : VexRiscvLitexSmpClusterParameter, cluster.io.debugReset <> io.debugReset cluster.io.timerInterrupts <> B(clint.harts.map(_.timerInterrupt)) cluster.io.softwareInterrupts <> B(clint.harts.map(_.softwareInterrupt)) + cluster.io.time := clint.time val dBusDecoder = BmbDecoderOutOfOrder( p = cluster.io.dMem.p, diff --git a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexMpCluster.scala b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexMpCluster.scala index ceb5a36..0f2af9b 100644 --- a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexMpCluster.scala +++ b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexMpCluster.scala @@ -58,6 +58,7 @@ case class VexRiscvLitexSmpMpCluster(p : VexRiscvLitexSmpMpClusterParameter, cluster.io.debugReset <> io.debugReset cluster.io.timerInterrupts <> B(clint.harts.map(_.timerInterrupt)) cluster.io.softwareInterrupts <> B(clint.harts.map(_.softwareInterrupt)) + cluster.io.time := clint.time val dBusDecoder = BmbDecoderOutOfOrder( p = cluster.io.dMem.p, @@ -253,6 +254,7 @@ object VexRiscvLitexSmpMpClusterOpenSbi extends App{ (dut.io.peripheral.ADR.toLong << 2) match { case 0xF0000000l => print(dut.io.peripheral.DAT_MOSI.toLong.toChar) case 0xF0000004l => dut.io.peripheral.DAT_MISO #= (if (System.in.available() != 0) System.in.read() else 0xFFFFFFFFl) + case _ => // case 0xF0000004l => { // val c = if(stdin.nonEmpty) { // stdin.dequeue().toInt & 0xFF From 1f9fce638806fa665610466dcfa84d16213ea761 Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Sat, 6 Jun 2020 22:12:32 +0200 Subject: [PATCH 84/91] Fix d$ uncached writes exception handeling --- src/main/scala/vexriscv/ip/DataCache.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/scala/vexriscv/ip/DataCache.scala b/src/main/scala/vexriscv/ip/DataCache.scala index 82fa3af..8d02a40 100644 --- a/src/main/scala/vexriscv/ip/DataCache.scala +++ b/src/main/scala/vexriscv/ip/DataCache.scala @@ -983,7 +983,7 @@ class DataCache(val p : DataCacheConfig, mmuParameter : MemoryTranslatorBusParam when(bypassCache){ io.cpu.writeBack.data := ioMemRspMuxed - if(catchAccessError) io.cpu.writeBack.accessError := io.mem.rsp.valid && io.mem.rsp.error + if(catchAccessError) io.cpu.writeBack.accessError := !request.wr && pending.last && io.mem.rsp.valid && io.mem.rsp.error } otherwise { io.cpu.writeBack.data := dataMux if(catchAccessError) io.cpu.writeBack.accessError := (waysHits & B(tagsReadRsp.map(_.error))) =/= 0 From cb5597818d10f5c19a394d8ffa62a9f3a12ad374 Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Sun, 7 Jun 2020 11:29:07 +0200 Subject: [PATCH 85/91] Fix d$ generation crash --- src/main/scala/vexriscv/ip/DataCache.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/main/scala/vexriscv/ip/DataCache.scala b/src/main/scala/vexriscv/ip/DataCache.scala index 8d02a40..65f2918 100644 --- a/src/main/scala/vexriscv/ip/DataCache.scala +++ b/src/main/scala/vexriscv/ip/DataCache.scala @@ -983,7 +983,8 @@ class DataCache(val p : DataCacheConfig, mmuParameter : MemoryTranslatorBusParam when(bypassCache){ io.cpu.writeBack.data := ioMemRspMuxed - if(catchAccessError) io.cpu.writeBack.accessError := !request.wr && pending.last && io.mem.rsp.valid && io.mem.rsp.error + def isLast = if(pending != null) pending.last else True + if(catchAccessError) io.cpu.writeBack.accessError := !request.wr && isLast && io.mem.rsp.valid && io.mem.rsp.error } otherwise { io.cpu.writeBack.data := dataMux if(catchAccessError) io.cpu.writeBack.accessError := (waysHits & B(tagsReadRsp.map(_.error))) =/= 0 From 2e8a059c77bba4d4136118e42a51347a1da46bd1 Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Sun, 7 Jun 2020 11:33:24 +0200 Subject: [PATCH 86/91] Fix travis verilator --- scripts/regression/verilator.mk | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/regression/verilator.mk b/scripts/regression/verilator.mk index 9472858..b97b74f 100644 --- a/scripts/regression/verilator.mk +++ b/scripts/regression/verilator.mk @@ -3,9 +3,9 @@ verilator/configure: rm -rf verilator* - wget https://www.veripool.org/ftp/verilator-4.032.tgz + wget https://www.veripool.org/ftp/verilator-4.034.tgz tar xvzf verilator*.t*gz - mv verilator-4.012 verilator + mv verilator-4.034 verilator verilator/Makefile: verilator/configure cd verilator From b0cd88c4626b0fe218aa76790145b358e86e055e Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Fri, 12 Jun 2020 16:18:41 +0200 Subject: [PATCH 87/91] SmpCluster now with proper jtag and plic --- .../demo/smp/VexRiscvSmpCluster.scala | 42 +++---- .../demo/smp/VexRiscvSmpLitexCluster.scala | 71 ++++++++--- .../demo/smp/VexRiscvSmpLitexMpCluster.scala | 110 +++++++++++++++--- 3 files changed, 172 insertions(+), 51 deletions(-) diff --git a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala index de29409..ccc1f57 100644 --- a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala +++ b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala @@ -6,8 +6,10 @@ import spinal.core.sim.{onSimEnd, simSuccess} import spinal.lib._ import spinal.lib.bus.bmb.sim.BmbMemoryAgent import spinal.lib.bus.bmb.{Bmb, BmbArbiter, BmbDecoder, BmbExclusiveMonitor, BmbInvalidateMonitor, BmbParameter} -import spinal.lib.com.jtag.Jtag +import spinal.lib.bus.misc.SizeMapping +import spinal.lib.com.jtag.{Jtag, JtagTapInstructionCtrl} import spinal.lib.com.jtag.sim.JtagTcp +import spinal.lib.system.debugger.SystemDebuggerConfig import vexriscv.ip.{DataCacheAck, DataCacheConfig, DataCacheMemBus, InstructionCache, InstructionCacheConfig} import vexriscv.plugin.{BranchPlugin, CsrAccess, CsrPlugin, CsrPluginConfig, DBusCachedPlugin, DBusSimplePlugin, DYNAMIC_TARGET, DebugPlugin, DecoderSimplePlugin, FullBarrelShifterPlugin, HazardSimplePlugin, IBusCachedPlugin, IBusSimplePlugin, IntAluPlugin, MmuPlugin, MmuPortConfig, MulDivIterativePlugin, MulPlugin, RegFilePlugin, STATIC, SrcPlugin, YamlPlugin} import vexriscv.{Riscv, VexRiscv, VexRiscvConfig, plugin} @@ -38,21 +40,23 @@ case class VexRiscvSmpCluster(p : VexRiscvSmpClusterParameter, val externalInterrupts = in Bits(p.cpuConfigs.size bits) val softwareInterrupts = in Bits(p.cpuConfigs.size bits) val externalSupervisorInterrupts = in Bits(p.cpuConfigs.size bits) - val jtag = slave(Jtag()) + val debugBus = slave(Bmb(SystemDebuggerConfig().getBmbParameter.copy(addressWidth = 20))) val debugReset = out Bool() val time = in UInt(64 bits) } + io.debugReset := False val cpus = for((cpuConfig, cpuId) <- p.cpuConfigs.zipWithIndex) yield new Area{ var iBus : Bmb = null var dBus : Bmb = null + var debug : Bmb = null cpuConfig.plugins.foreach { case plugin: DebugPlugin => debugClockDomain{ plugin.debugClockDomain = debugClockDomain } case _ => } - if(cpuId == 0) cpuConfig.plugins += new DebugPlugin(debugClockDomain) + cpuConfig.plugins += new DebugPlugin(debugClockDomain) val core = new VexRiscv(cpuConfig) core.plugins.foreach { case plugin: IBusCachedPlugin => iBus = plugin.iBus.toBmb() @@ -65,8 +69,8 @@ case class VexRiscvSmpCluster(p : VexRiscvSmpClusterParameter, if (plugin.utime != null) plugin.utime := io.time } case plugin: DebugPlugin => debugClockDomain{ - io.debugReset := RegNext(plugin.io.resetOut) - io.jtag <> plugin.io.bus.fromJtag() + io.debugReset setWhen(RegNext(plugin.io.resetOut)) + debug = plugin.io.bus.fromBmb() } case _ => } @@ -97,19 +101,18 @@ case class VexRiscvSmpCluster(p : VexRiscvSmpClusterParameter, io.dMem << invalidateMonitor.io.output -// val iBusArbiter = BmbArbiter( -// p = iBusArbiterParameter, -// portCount = cpus.size, -// pendingRspMax = 64, -// lowerFirstPriority = false, -// inputsWithInv = cpus.map(_ => true), -// inputsWithSync = cpus.map(_ => true), -// pendingInvMax = 16 -// ) -// -// (iBusArbiter.io.inputs, cpus).zipped.foreach(_ << _.iBus) -// io.iMem << iBusArbiter.io.output (io.iMems, cpus).zipped.foreach(_ << _.iBus) + + val debug = debugClockDomain on new Area{ + val arbiter = BmbDecoder( + p = io.debugBus.p, + mappings = List.tabulate(p.cpuConfigs.size)(i => SizeMapping(0x00000 + i*0x1000, 0x1000)), + capabilities = List.fill(p.cpuConfigs.size)(io.debugBus.p), + pendingMax = 2 + ) + arbiter.io.input << io.debugBus + (arbiter.io.outputs, cpus.map(_.debug)).zipped.foreach(_ >> _) + } } @@ -417,10 +420,7 @@ object VexRiscvSmpClusterTestInfrastructure{ import spinal.core.sim._ dut.clockDomain.forkStimulus(10) dut.debugClockDomain.forkStimulus(10) -// JtagTcp(dut.io.jtag, 100) - dut.io.jtag.tck #= false - dut.io.jtag.tdi #= false - dut.io.jtag.tms #= false + dut.io.debugBus.cmd.valid #= false } } diff --git a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexCluster.scala b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexCluster.scala index 9466cfe..43ae242 100644 --- a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexCluster.scala +++ b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexCluster.scala @@ -3,13 +3,15 @@ package vexriscv.demo.smp import spinal.core._ import spinal.lib.bus.bmb._ import spinal.lib.bus.wishbone.{Wishbone, WishboneConfig, WishboneSlaveFactory} -import spinal.lib.com.jtag.Jtag +import spinal.lib.com.jtag.{Jtag, JtagTapInstructionCtrl} import spinal.lib._ import spinal.lib.bus.bmb.sim.{BmbMemoryMultiPort, BmbMemoryTester} import spinal.lib.bus.misc.{AddressMapping, DefaultMapping, SizeMapping} import spinal.lib.eda.bench.Bench import spinal.lib.misc.Clint +import spinal.lib.misc.plic.{PlicGatewayActiveHigh, PlicMapper, PlicMapping, PlicTarget} import spinal.lib.sim.{SimData, SparseMemory, StreamDriver, StreamMonitor, StreamReadyRandomizer} +import spinal.lib.system.debugger.{JtagBridgeNoTap, SystemDebugger, SystemDebuggerConfig} import vexriscv.demo.smp.VexRiscvLitexSmpClusterOpenSbi.{cpuCount, parameter} import vexriscv.demo.smp.VexRiscvSmpClusterGen.vexRiscvConfig import vexriscv.{VexRiscv, VexRiscvConfig} @@ -25,7 +27,8 @@ case class VexRiscvLitexSmpClusterParameter( cluster : VexRiscvSmpClusterParamet //addAttribute("""mark_debug = "true"""") case class VexRiscvLitexSmpCluster(p : VexRiscvLitexSmpClusterParameter, - debugClockDomain : ClockDomain) extends Component{ + debugClockDomain : ClockDomain, + jtagClockDomain : ClockDomain) extends Component{ val peripheralWishboneConfig = WishboneConfig( addressWidth = 30, @@ -41,9 +44,9 @@ case class VexRiscvLitexSmpCluster(p : VexRiscvLitexSmpClusterParameter, val iMem = master(LiteDramNative(p.liteDram)) val peripheral = master(Wishbone(peripheralWishboneConfig)) val clint = slave(Wishbone(Clint.getWisboneConfig())) - val externalInterrupts = in Bits(p.cluster.cpuConfigs.size bits) - val externalSupervisorInterrupts = in Bits(p.cluster.cpuConfigs.size bits) - val jtag = slave(Jtag()) + val plic = slave(Wishbone(WishboneConfig(addressWidth = 20, dataWidth = 32))) + val interrupts = in Bits(32 bits) + val jtagInstruction = slave(JtagTapInstructionCtrl()) val debugReset = out Bool() } val cpuCount = p.cluster.cpuConfigs.size @@ -51,14 +54,25 @@ case class VexRiscvLitexSmpCluster(p : VexRiscvLitexSmpClusterParameter, clint.driveFrom(WishboneSlaveFactory(io.clint)) val cluster = VexRiscvSmpCluster(p.cluster, debugClockDomain) - cluster.io.externalInterrupts <> io.externalInterrupts - cluster.io.externalSupervisorInterrupts <> io.externalSupervisorInterrupts - cluster.io.jtag <> io.jtag cluster.io.debugReset <> io.debugReset cluster.io.timerInterrupts <> B(clint.harts.map(_.timerInterrupt)) cluster.io.softwareInterrupts <> B(clint.harts.map(_.softwareInterrupt)) cluster.io.time := clint.time + val debug = debugClockDomain on new Area{ + val jtagConfig = SystemDebuggerConfig() + val jtagBridge = new JtagBridgeNoTap( + c = jtagConfig, + jtagClockDomain = jtagClockDomain + ) + jtagBridge.io.ctrl << io.jtagInstruction + + val debugger = new SystemDebugger(jtagConfig) + debugger.io.remote <> jtagBridge.io.remote + + cluster.io.debugBus << debugger.io.mem.toBmb() + } + val dBusDecoder = BmbDecoderOutOfOrder( p = cluster.io.dMem.p, mappings = Seq(DefaultMapping, p.liteDramMapping), @@ -114,9 +128,39 @@ case class VexRiscvLitexSmpCluster(p : VexRiscvLitexSmpClusterParameter, val peripheralWishbone = peripheralArbiter.io.output.pipelined(cmdValid = true).toWishbone() io.peripheral << peripheralWishbone + + val plic = new Area{ + val priorityWidth = 2 + + val gateways = for(i <- 1 until 32) yield PlicGatewayActiveHigh( + source = io.interrupts(i), + id = i, + priorityWidth = priorityWidth + ) + + val bus = WishboneSlaveFactory(io.plic) + + val targets = for(i <- 0 until cpuCount) yield new Area{ + val machine = PlicTarget( + gateways = gateways, + priorityWidth = priorityWidth + ) + val supervisor = PlicTarget( + gateways = gateways, + priorityWidth = priorityWidth + ) + + cluster.io.externalInterrupts(i) := machine.iep + cluster.io.externalSupervisorInterrupts(i) := supervisor.iep + } + + val bridge = PlicMapper(bus, PlicMapping.sifive)( + gateways = gateways, + targets = targets.flatMap(t => List(t.machine, t.supervisor)) + ) + } } -//ifconfig eth0 192.168.0.50 netmask 255.255.255.0 up object VexRiscvLitexSmpClusterGen extends App { for(cpuCount <- List(1,2,4,8)) { def parameter = VexRiscvLitexSmpClusterParameter( @@ -136,7 +180,8 @@ object VexRiscvLitexSmpClusterGen extends App { def dutGen = { val toplevel = VexRiscvLitexSmpCluster( p = parameter, - debugClockDomain = ClockDomain.current.copy(reset = Bool().setName("debugResetIn")) + debugClockDomain = ClockDomain.current.copy(reset = Bool().setName("debugResetIn")), + jtagClockDomain = ClockDomain.external("jtag", withReset = false) ) toplevel } @@ -175,7 +220,8 @@ object VexRiscvLitexSmpClusterOpenSbi extends App{ def dutGen = { val top = VexRiscvLitexSmpCluster( p = parameter, - debugClockDomain = ClockDomain.current.copy(reset = Bool().setName("debugResetIn")) + debugClockDomain = ClockDomain.current.copy(reset = Bool().setName("debugResetIn")), + jtagClockDomain = ClockDomain.external("jtag", withReset = false) ) top.rework{ top.io.clint.setAsDirectionLess.allowDirectionLessIo @@ -216,8 +262,7 @@ object VexRiscvLitexSmpClusterOpenSbi extends App{ dut.io.iMem.simSlave(ram, dut.clockDomain) dut.io.dMem.simSlave(ram, dut.clockDomain, dut.dMemBridge.unburstified) - dut.io.externalInterrupts #= 0 - dut.io.externalSupervisorInterrupts #= 0 + dut.io.interrupts #= 0 dut.clockDomain.onFallingEdges{ if(dut.io.peripheral.CYC.toBoolean){ diff --git a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexMpCluster.scala b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexMpCluster.scala index 0f2af9b..3631cfc 100644 --- a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexMpCluster.scala +++ b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexMpCluster.scala @@ -3,13 +3,19 @@ package vexriscv.demo.smp import spinal.core._ import spinal.lib.bus.bmb._ import spinal.lib.bus.wishbone.{Wishbone, WishboneConfig, WishboneSlaveFactory} -import spinal.lib.com.jtag.Jtag +import spinal.lib.com.jtag.{Jtag, JtagTap, JtagTapInstructionCtrl} import spinal.lib._ +import spinal.lib.blackbox.xilinx.s7.BSCANE2 import spinal.lib.bus.bmb.sim.{BmbMemoryMultiPort, BmbMemoryTester} import spinal.lib.bus.misc.{AddressMapping, DefaultMapping, SizeMapping} +import spinal.lib.com.jtag.sim.JtagTcp +import spinal.lib.com.jtag.xilinx.Bscane2BmbMaster import spinal.lib.eda.bench.Bench import spinal.lib.misc.Clint +import spinal.lib.misc.plic.{PlicGatewayActiveHigh, PlicMapper, PlicMapping, PlicTarget} import spinal.lib.sim.{SimData, SparseMemory, StreamDriver, StreamMonitor, StreamReadyRandomizer} +import spinal.lib.system.debugger.{JtagBridgeNoTap, SystemDebugger, SystemDebuggerConfig} +import sun.jvm.hotspot.oops.DataLayout import vexriscv.demo.smp.VexRiscvLitexSmpMpClusterOpenSbi.{cpuCount, parameter} import vexriscv.demo.smp.VexRiscvSmpClusterGen.vexRiscvConfig import vexriscv.{VexRiscv, VexRiscvConfig} @@ -24,8 +30,9 @@ case class VexRiscvLitexSmpMpClusterParameter( cluster : VexRiscvSmpClusterParam liteDramMapping : AddressMapping) //addAttribute("""mark_debug = "true"""") -case class VexRiscvLitexSmpMpCluster(p : VexRiscvLitexSmpMpClusterParameter, - debugClockDomain : ClockDomain) extends Component{ +class VexRiscvLitexSmpMpCluster(val p : VexRiscvLitexSmpMpClusterParameter, + val debugClockDomain : ClockDomain, + val jtagClockDomain : ClockDomain) extends Component{ val peripheralWishboneConfig = WishboneConfig( addressWidth = 30, @@ -43,23 +50,48 @@ case class VexRiscvLitexSmpMpCluster(p : VexRiscvLitexSmpMpClusterParameter, val iMem = Vec(master(LiteDramNative(p.liteDram)), cpuCount) val peripheral = master(Wishbone(peripheralWishboneConfig)) val clint = slave(Wishbone(Clint.getWisboneConfig())) - val externalInterrupts = in Bits(p.cluster.cpuConfigs.size bits) - val externalSupervisorInterrupts = in Bits(p.cluster.cpuConfigs.size bits) - val jtag = slave(Jtag()) + val plic = slave(Wishbone(WishboneConfig(addressWidth = 20, dataWidth = 32))) + val interrupts = in Bits(32 bits) + val jtagInstruction = slave(JtagTapInstructionCtrl()) val debugReset = out Bool() } val clint = Clint(cpuCount) clint.driveFrom(WishboneSlaveFactory(io.clint)) val cluster = VexRiscvSmpCluster(p.cluster, debugClockDomain) - cluster.io.externalInterrupts <> io.externalInterrupts - cluster.io.externalSupervisorInterrupts <> io.externalSupervisorInterrupts - cluster.io.jtag <> io.jtag cluster.io.debugReset <> io.debugReset cluster.io.timerInterrupts <> B(clint.harts.map(_.timerInterrupt)) cluster.io.softwareInterrupts <> B(clint.harts.map(_.softwareInterrupt)) cluster.io.time := clint.time + val debug = debugClockDomain on new Area{ + val jtagConfig = SystemDebuggerConfig() + + val jtagBridge = new JtagBridgeNoTap(jtagConfig, jtagClockDomain) + jtagBridge.io.ctrl << io.jtagInstruction + + val debugger = new SystemDebugger(jtagConfig) + debugger.io.remote <> jtagBridge.io.remote + + cluster.io.debugBus << debugger.io.mem.toBmb() + +// io.jtagInstruction.allowDirectionLessIo.setAsDirectionLess +// val bridge = Bscane2BmbMaster(1) +// cluster.io.debugBus << bridge.io.bmb + + +// val bscane2 = BSCANE2(usedId) +// val jtagClockDomain = ClockDomain(bscane2.TCK) +// +// val jtagBridge = new JtagBridgeNoTap(jtagConfig, jtagClockDomain) +// jtagBridge.io.ctrl << bscane2.toJtagTapInstructionCtrl() +// +// val debugger = new SystemDebugger(jtagConfig) +// debugger.io.remote <> jtagBridge.io.remote +// +// io.bmb << debugger.io.mem.toBmb() + } + val dBusDecoder = BmbDecoderOutOfOrder( p = cluster.io.dMem.p, mappings = Seq(DefaultMapping, p.liteDramMapping), @@ -116,6 +148,38 @@ case class VexRiscvLitexSmpMpCluster(p : VexRiscvLitexSmpMpClusterParameter, val dMemBridge = for(id <- 0 until cpuCount) yield { io.dMem(id).fromBmb(dBusDemux.io.outputs(id), wdataFifoSize = 32, rdataFifoSize = 32) } + + + val plic = new Area{ + val priorityWidth = 2 + + val gateways = for(i <- 1 until 32) yield PlicGatewayActiveHigh( + source = io.interrupts(i), + id = i, + priorityWidth = priorityWidth + ) + + val bus = WishboneSlaveFactory(io.plic) + + val targets = for(i <- 0 until cpuCount) yield new Area{ + val machine = PlicTarget( + gateways = gateways, + priorityWidth = priorityWidth + ) + val supervisor = PlicTarget( + gateways = gateways, + priorityWidth = priorityWidth + ) + + cluster.io.externalInterrupts(i) := machine.iep + cluster.io.externalSupervisorInterrupts(i) := supervisor.iep + } + + val bridge = PlicMapper(bus, PlicMapping.sifive)( + gateways = gateways, + targets = targets.flatMap(t => List(t.machine, t.supervisor)) + ) + } // // io.dMem.foreach(_.cmd.valid.addAttribute("""mark_debug = "true"""")) // io.dMem.foreach(_.cmd.ready.addAttribute("""mark_debug = "true"""")) @@ -145,9 +209,10 @@ object VexRiscvLitexSmpMpClusterGen extends App { ) def dutGen = { - val toplevel = VexRiscvLitexSmpMpCluster( + val toplevel = new VexRiscvLitexSmpMpCluster( p = parameter, - debugClockDomain = ClockDomain.current.copy(reset = Bool().setName("debugResetIn")) + debugClockDomain = ClockDomain.current.copy(reset = Bool().setName("debugResetIn")), + jtagClockDomain = ClockDomain.external("jtag", withReset = false) ) toplevel } @@ -185,10 +250,20 @@ object VexRiscvLitexSmpMpClusterOpenSbi extends App{ ) def dutGen = { - val top = VexRiscvLitexSmpMpCluster( + val top = new VexRiscvLitexSmpMpCluster( p = parameter, - debugClockDomain = ClockDomain.current.copy(reset = Bool().setName("debugResetIn")) - ) + debugClockDomain = ClockDomain.current.copy(reset = Bool().setName("debugResetIn")), + jtagClockDomain = ClockDomain.external("jtag", withReset = false) + ){ + io.jtagInstruction.allowDirectionLessIo.setAsDirectionLess + val jtag = slave(Jtag()) + jtagClockDomain.readClockWire.setAsDirectionLess() := jtag.tck + val jtagLogic = jtagClockDomain on new Area{ + val tap = new JtagTap(jtag, 4) + val idcodeArea = tap.idcode(B"x10001FFF")(1) + val wrapper = tap.map(io.jtagInstruction, instructionId = 2) + } + } top.rework{ top.io.clint.setAsDirectionLess.allowDirectionLessIo top.io.peripheral.setAsDirectionLess.allowDirectionLessIo.simPublic() @@ -217,6 +292,7 @@ object VexRiscvLitexSmpMpClusterOpenSbi extends App{ dut.debugClockDomain.resetSim #= false } + JtagTcp(dut.jtag, 10*20) val ram = SparseMemory() ram.loadBin(0x80000000l, "../opensbi/build/platform/spinal/vexriscv/sim/smp/firmware/fw_jump.bin") @@ -229,8 +305,8 @@ object VexRiscvLitexSmpMpClusterOpenSbi extends App{ dut.io.dMem(id).simSlave(ram, dut.clockDomain) } - dut.io.externalInterrupts #= 0 - dut.io.externalSupervisorInterrupts #= 0 + dut.io.interrupts #= 0 + // val stdin = mutable.Queue[Byte]() // def stdInPush(str : String) = stdin ++= str.toCharArray.map(_.toByte) @@ -272,7 +348,7 @@ object VexRiscvLitexSmpMpClusterOpenSbi extends App{ fork{ val at = 0 - val duration = 0 + val duration = 1000 while(simTime() < at*1000000l) { disableSimWave() sleep(100000 * 10) From 490c1f6b0276f9b09a4ea34bfe6ce47efb7938d1 Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Fri, 19 Jun 2020 15:56:45 +0200 Subject: [PATCH 88/91] cleanup of old todo --- src/main/scala/vexriscv/TestsWorkspace.scala | 7 ------- src/main/scala/vexriscv/demo/smp/Misc.scala | 2 +- src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala | 3 --- 3 files changed, 1 insertion(+), 11 deletions(-) diff --git a/src/main/scala/vexriscv/TestsWorkspace.scala b/src/main/scala/vexriscv/TestsWorkspace.scala index af1c77b..b522aed 100644 --- a/src/main/scala/vexriscv/TestsWorkspace.scala +++ b/src/main/scala/vexriscv/TestsWorkspace.scala @@ -274,10 +274,3 @@ object TestsWorkspace { } } } - -//TODO DivPlugin should not used MixedDivider (double twoComplement) -//TODO DivPlugin should register the twoComplement output before pipeline insertion -//TODO MulPlugin doesn't fit well on Artix (FMAX) -//TODO PcReg design is unoptimized by Artix synthesis -//TODO FMAX SRC mux + bipass mux prioriti -//TODO FMAX, isFiring is to pesimisstinc in some cases(include removeIt flushed ..) \ No newline at end of file diff --git a/src/main/scala/vexriscv/demo/smp/Misc.scala b/src/main/scala/vexriscv/demo/smp/Misc.scala index b192a9c..9980cf4 100644 --- a/src/main/scala/vexriscv/demo/smp/Misc.scala +++ b/src/main/scala/vexriscv/demo/smp/Misc.scala @@ -188,7 +188,7 @@ case class BmbToLiteDram(bmbParameter : BmbParameter, wData.arbitrationFrom(dataFork.throwWhen(dataFork.isRead)) wData.data := dataFork.data wData.we := dataFork.mask - io.output.wdata << wData.queueLowLatency(wdataFifoSize, latency = 1) //TODO queue low latency + io.output.wdata << wData.queueLowLatency(wdataFifoSize, latency = 1) } else { dataFork.ready := True io.output.wdata.valid := False diff --git a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala index ccc1f57..efd4010 100644 --- a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala +++ b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala @@ -454,12 +454,9 @@ object VexRiscvSmpClusterTest extends App{ // top -b -n 1 // TODO -// litex cluster should use out of order decoder // MultiChannelFifo.toStream arbitration // BmbDecoderOutOfOrder arbitration // DataCache to bmb invalidation that are more than single line -// update fence w to w -// DBusCachedPlugin dBusAccess execute.isValid := True is induce a longe combinatorial path to check conditions, D$ execute valid => execute haltIt object VexRiscvSmpClusterOpenSbi extends App{ import spinal.core.sim._ From c18bc12cb20b1ede650f6eb68628fb8c0a42dec2 Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Fri, 19 Jun 2020 15:57:21 +0200 Subject: [PATCH 89/91] Fix DebugPlugin.fromBmb --- src/main/scala/vexriscv/plugin/DebugPlugin.scala | 1 + 1 file changed, 1 insertion(+) diff --git a/src/main/scala/vexriscv/plugin/DebugPlugin.scala b/src/main/scala/vexriscv/plugin/DebugPlugin.scala index 9f2a243..4797e21 100644 --- a/src/main/scala/vexriscv/plugin/DebugPlugin.scala +++ b/src/main/scala/vexriscv/plugin/DebugPlugin.scala @@ -106,6 +106,7 @@ case class DebugExtensionBus() extends Bundle with IMasterSlave{ bus.rsp.valid := RegNext(cmd.fire) init(False) bus.rsp.data := rsp.data bus.rsp.last := True + bus.rsp.setSuccess() bus } From f0f2cf61daca13629368a6217a3a35800e546741 Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Fri, 19 Jun 2020 15:57:56 +0200 Subject: [PATCH 90/91] D$ inv/ack are now fragment, which ease serialisation of wider invalidations --- src/main/scala/vexriscv/ip/DataCache.scala | 38 ++++++++++++---------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/src/main/scala/vexriscv/ip/DataCache.scala b/src/main/scala/vexriscv/ip/DataCache.scala index 65f2918..98ba0ab 100644 --- a/src/main/scala/vexriscv/ip/DataCache.scala +++ b/src/main/scala/vexriscv/ip/DataCache.scala @@ -228,8 +228,8 @@ case class DataCacheMemBus(p : DataCacheConfig) extends Bundle with IMasterSlave val cmd = Stream (DataCacheMemCmd(p)) val rsp = Flow (DataCacheMemRsp(p)) - val inv = p.withInvalidate generate Stream(DataCacheInv(p)) - val ack = p.withInvalidate generate Stream(DataCacheAck(p)) + val inv = p.withInvalidate generate Stream(Fragment(DataCacheInv(p))) + val ack = p.withInvalidate generate Stream(Fragment(DataCacheAck(p))) val sync = p.withInvalidate generate Stream(DataCacheSync(p)) override def asMaster(): Unit = { @@ -279,15 +279,7 @@ case class DataCacheMemBus(p : DataCacheConfig) extends Bundle with IMasterSlave axi.r.ready := True axi.b.ready := True - - //TODO remove - val axi2 = cloneOf(axi) - // axi.arw >/-> axi2.arw - // axi.w >/-> axi2.w - // axi.r <-/< axi2.r - // axi.b <-/< axi2.b - axi2 << axi - axi2 + axi } @@ -509,13 +501,24 @@ case class DataCacheMemBus(p : DataCacheConfig) extends Bundle with IMasterSlave if(p.withExclusive) rsp.exclusive := bus.rsp.exclusive bus.rsp.ready := True - if(p.withInvalidate){ - inv.arbitrationFrom(bus.inv) - inv.address := bus.inv.address - inv.enable := bus.inv.all + val invalidateLogic = p.withInvalidate generate new Area{ + val beatCountMinusOne = bus.inv.transferBeatCountMinusOne(p.bytePerLine) + val counter = Reg(UInt(widthOf(beatCountMinusOne) bits)) init(0) - bus.ack.arbitrationFrom(ack) - // //TODO manage lenght ? + inv.valid := bus.inv.valid + inv.address := bus.inv.address + (counter << log2Up(p.bytePerLine)) + inv.enable := bus.inv.all + inv.last := counter === beatCountMinusOne + bus.inv.ready := inv.last && inv.ready + + if(widthOf(counter) != 0) when(inv.fire){ + counter := counter + 1 + when(inv.last){ + counter := 0 + } + } + + bus.ack.arbitrationFrom(ack.throwWhen(!ack.last)) } }.bus @@ -1112,6 +1115,7 @@ class DataCache(val p : DataCacheConfig, mmuParameter : MemoryTranslatorBusParam } io.mem.ack.arbitrationFrom(input) io.mem.ack.hit := wayHit + io.mem.ack.last := input.last //Manage invalidation read during write hazard s1.invalidations := RegNextWhen((input.valid && input.enable && input.address(lineRange) === s0.input.address(lineRange)) ? wayHits | 0, s0.input.ready) From c12f9a378d2174d0cbca8c76dee6a325a0174e3d Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Sat, 20 Jun 2020 13:18:46 +0200 Subject: [PATCH 91/91] Fix inv regression --- src/test/cpp/regression/main.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/test/cpp/regression/main.cpp b/src/test/cpp/regression/main.cpp index 717e534..5468545 100644 --- a/src/test/cpp/regression/main.cpp +++ b/src/test/cpp/regression/main.cpp @@ -2496,12 +2496,12 @@ public: if(top->dBus_inv_ready) top->dBus_inv_valid = 0; if(top->dBus_inv_valid == 0 && VL_RANDOM_I(7) < 5){ top->dBus_inv_valid = 1; - top->dBus_inv_payload_enable = VL_RANDOM_I(7) < 100; + top->dBus_inv_payload_fragment_enable = VL_RANDOM_I(7) < 100; if(!invalidationHint.empty()){ - top->dBus_inv_payload_address = invalidationHint.front(); + top->dBus_inv_payload_fragment_address = invalidationHint.front(); invalidationHint.pop(); } else { - top->dBus_inv_payload_address = VL_RANDOM_I(32); + top->dBus_inv_payload_fragment_address = VL_RANDOM_I(32); } } }