diff --git a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala index b490654..0371b29 100644 --- a/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala +++ b/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala @@ -292,6 +292,7 @@ object VexRiscvSmpClusterTestInfrastructure{ val REPORT_END = 0x08 val REPORT_BARRIER_START = 0x0C val REPORT_BARRIER_END = 0x10 + val REPORT_CONSISTENCY_VALUES = 0x14 val PUTC = 0x00 val GETC = 0x04 @@ -310,14 +311,7 @@ object VexRiscvSmpClusterTestInfrastructure{ } } val reports = ArrayBuffer.fill(cpuCount)(ArrayBuffer[Report]()) - onSimEnd{ - for((list, hart) <- reports.zipWithIndex){ - println(f"\n\n**** CPU $hart%2d ****") - for((report, reportId) <- list.zipWithIndex){ - println(f" $reportId%3d : ${report.code}%3x -> ${report.data}%3d") - } - } - } + val writeTable = mutable.HashMap[Int, Int => Unit]() val readTable = mutable.HashMap[Int, () => Int]() @@ -329,6 +323,24 @@ object VexRiscvSmpClusterTestInfrastructure{ var reportWatchdog = 0 val cpuEnd = Array.fill(cpuCount)(false) val barriers = mutable.HashMap[Int, Int]() + var consistancyCounter = 0 + var consistancyLast = 0 + var consistancyA = 0 + var consistancyB = 0 + var consistancyAB = 0 + var consistancyNone = 0 + + onSimEnd{ + for((list, hart) <- reports.zipWithIndex){ + println(f"\n\n**** CPU $hart%2d ****") + for((report, reportId) <- list.zipWithIndex){ + println(f" $reportId%3d : ${report.code}%3x -> ${report.data}%3d") + } + } + + println(s"consistancy NONE:$consistancyNone A:$consistancyA B:$consistancyB AB:$consistancyAB") + } + override def setByte(address: Long, value: Byte): Unit = { if((address & 0xF0000000l) != 0xF0000000l) return super.setByte(address, value) val byteId = address & 3 @@ -344,7 +356,7 @@ object VexRiscvSmpClusterTestInfrastructure{ code = (offset & 0x00FFFF).toInt, data = writeData ) - println(report) +// println(report) reports(report.hart) += report reportWatchdog += 1 import report._ @@ -361,6 +373,21 @@ object VexRiscvSmpClusterTestInfrastructure{ val counter = barriers.getOrElse(data, 0) assert(counter == cpuCount) } + case REPORT_CONSISTENCY_VALUES => consistancyCounter match { + case 0 => { + consistancyCounter = 1 + consistancyLast = data + } + case 1 => { + consistancyCounter = 0 + (data, consistancyLast) match { + case (666, 0) => consistancyA += 1 + case (0, 666) => consistancyB += 1 + case (666, 666) => consistancyAB += 1 + case (0,0) => consistancyNone += 1; simFailure("Consistancy issue :(") + } + } + } } } case _ => writeTable.get(offset.toInt) match { @@ -440,7 +467,7 @@ object VexRiscvSmpClusterTest extends App{ import spinal.core.sim._ val simConfig = SimConfig -// simConfig.withWave + simConfig.withWave simConfig.allOptimisation simConfig.addSimulatorFlag("--threads 1") @@ -448,7 +475,9 @@ object VexRiscvSmpClusterTest extends App{ val withStall = true simConfig.compile(VexRiscvSmpClusterGen.vexRiscvCluster(cpuCount)).doSimUntilVoid(seed = 42){dut => - SimTimeout(10000*10*cpuCount) + disableSimWave() + SimTimeout(100000000l*10*cpuCount) + dut.clockDomain.forkSimSpeedPrinter(1.0) VexRiscvSmpClusterTestInfrastructure.init(dut) val ram = VexRiscvSmpClusterTestInfrastructure.ram(dut) ram.memory.loadBin(0x80000000l, "src/test/cpp/raw/smp/build/smp.bin") diff --git a/src/main/scala/vexriscv/ip/DataCache.scala b/src/main/scala/vexriscv/ip/DataCache.scala index d685a34..3464165 100644 --- a/src/main/scala/vexriscv/ip/DataCache.scala +++ b/src/main/scala/vexriscv/ip/DataCache.scala @@ -102,10 +102,10 @@ case class DataCacheCpuExecute(p : DataCacheConfig) extends Bundle with IMasterS val address = UInt(p.addressWidth bit) val haltIt = Bool val args = DataCacheCpuExecuteArgs(p) - val fence = Bool() + val totalyConsistent = Bool() override def asMaster(): Unit = { - out(isValid, args, address, fence) + out(isValid, args, address, totalyConsistent) in(haltIt) } } @@ -129,9 +129,10 @@ case class DataCacheCpuMemory(p : DataCacheConfig) extends Bundle with IMasterSl val isWrite = Bool val address = UInt(p.addressWidth bit) val mmuBus = MemoryTranslatorBus() + val fenceValid = Bool() override def asMaster(): Unit = { - out(isValid, isStuck, isRemoved, address) + out(isValid, isStuck, isRemoved, address, fenceValid) in(isWrite) slave(mmuBus) } @@ -148,11 +149,13 @@ case class DataCacheCpuWriteBack(p : DataCacheConfig) extends Bundle with IMaste val address = UInt(p.addressWidth bit) val mmuException, unalignedAccess, accessError = Bool() val keepMemRspData = Bool() //Used by external AMO to avoid having an internal buffer + val fenceValid = Bool() + val fenceFire = Bool() // val exceptionBus = if(p.catchSomething) Flow(ExceptionCause()) else null override def asMaster(): Unit = { - out(isValid,isStuck,isUser, address) + out(isValid,isStuck,isUser, address, fenceValid, fenceFire) in(haltIt, data, mmuException, unalignedAccess, accessError, isWrite, keepMemRspData) } } @@ -514,7 +517,7 @@ class DataCache(val p : DataCacheConfig) extends Component{ val counter = Reg(UInt(log2Up(pendingMax) + 1 bits)) init(0) counter := counter + U(io.mem.cmd.fire && io.mem.cmd.last) - U(io.mem.rsp.valid && io.mem.rsp.last) - val consistent = counter === 0 + val done = counter === 0 val full = RegNext(counter.msb) val last = counter === 1 @@ -529,16 +532,26 @@ class DataCache(val p : DataCacheConfig) extends Component{ val sync = withInvalidate generate new Area{ io.mem.sync.ready := True - val counter = Reg(UInt(log2Up(pendingMax) + 1 bits)) init(0) - counter := counter + U(io.mem.cmd.fire && io.mem.cmd.wr) - U(io.mem.sync.fire) + val pendingSync = Reg(UInt(log2Up(pendingMax) + 1 bits)) init(0) + val pendingSyncNext = pendingSync + U(io.mem.cmd.fire && io.mem.cmd.wr) - U(io.mem.sync.fire) + pendingSync := pendingSyncNext - val full = RegNext(counter.msb) + val full = RegNext(pendingSync.msb) io.cpu.execute.haltIt setWhen(full) - val consistent = counter === 0 + + val incoerentSync = Reg(UInt(log2Up(pendingMax) + 1 bits)) init(0) + incoerentSync := incoerentSync - U(io.mem.sync.fire && incoerentSync =/= 0) + when(io.cpu.writeBack.fenceValid){ incoerentSync := pendingSyncNext } + + + val totalyConsistent = pendingSync === 0 + val fenceConsistent = incoerentSync === 0 } + + val stage0 = new Area{ val mask = io.cpu.execute.size.mux ( U(0) -> B"0001", @@ -548,10 +561,14 @@ class DataCache(val p : DataCacheConfig) extends Component{ val dataColisions = collisionProcess(io.cpu.execute.address(lineRange.high downto wordRange.low), mask) val wayInvalidate = B(0, wayCount bits) //Used if invalidate enabled - when(io.cpu.execute.fence){ - val consistent = if(withInvalidate) sync.consistent else if(withWriteResponse) pending.consistent else null - if(consistent != null){ - when(!consistent || io.cpu.memory.isValid && io.cpu.memory.isWrite || io.cpu.writeBack.isValid && io.cpu.memory.isWrite){ + val isAmo = if(withAmo) io.cpu.execute.isAmo else False + + //Ensure write to read consistency + val consistancyCheck = (withInvalidate || withWriteResponse) generate new Area { + val fenceConsistent = (if(withInvalidate) sync.fenceConsistent else pending.done) && !io.cpu.writeBack.fenceValid && !io.cpu.memory.fenceValid //Pessimistic fence tracking + val totalyConsistent = (if(withInvalidate) sync.totalyConsistent else pending.done) && !(io.cpu.memory.isValid && io.cpu.memory.isWrite) && !(io.cpu.writeBack.isValid && io.cpu.memory.isWrite) + when(io.cpu.execute.isValid && (!io.cpu.execute.args.wr || isAmo)){ + when(!fenceConsistent || io.cpu.execute.totalyConsistent && !totalyConsistent){ io.cpu.execute.haltIt := True } } @@ -632,7 +649,6 @@ class DataCache(val p : DataCacheConfig) extends Component{ } } - val lrSc = withInternalLrSc generate new Area{ val reserved = RegInit(False) when(io.cpu.writeBack.isValid && !io.cpu.writeBack.isStuck && request.isLrsc @@ -923,7 +939,7 @@ class DataCache(val p : DataCacheConfig) extends Component{ io.mem.ack.hit := wayHit //Manage invalidation read during write hazard - s1.invalidations := RegNext((input.valid && input.enable) ? wayHits | 0) + s1.invalidations := RegNextWhen((input.valid && input.enable && input.address(lineRange) === s0.input.address(lineRange)) ? wayHits | 0, s0.input.ready) } } } \ No newline at end of file diff --git a/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala b/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala index 769ed07..82c7953 100644 --- a/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala +++ b/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala @@ -50,7 +50,8 @@ class DBusCachedPlugin(val config : DataCacheConfig, object MEMORY_LRSC extends Stageable(Bool) object MEMORY_AMO extends Stageable(Bool) object MEMORY_FENCE extends Stageable(Bool) - object MEMORY_FENCE_DECODED extends Stageable(Bool) + object MEMORY_FENCE_FRONT extends Stageable(Bool) + object MEMORY_FENCE_BACK extends Stageable(Bool) object IS_DBUS_SHARING extends Stageable(Bool()) object MEMORY_VIRTUAL_ADDRESS extends Stageable(UInt(32 bits)) @@ -224,17 +225,22 @@ class DBusCachedPlugin(val config : DataCacheConfig, def PS = PW || PO } + //Manage write to read hit ordering (ensure invalidation timings) val fence = new Area{ - val hazard = False + insert(MEMORY_FENCE_FRONT) := False + insert(MEMORY_FENCE_BACK) := False val ff = input(INSTRUCTION)(31 downto 20).as(FenceFlags()) if(withWriteResponse){ - hazard setWhen(input(MEMORY_FENCE) && (ff.PS && ff.SL)) //Manage write to read hit ordering (ensure invalidation timings) - when(input(INSTRUCTION)(26 downto 25) =/= 0){ - if(withLrSc) hazard setWhen(input(MEMORY_LRSC)) - if(withAmo) hazard setWhen(input(MEMORY_AMO)) + insert(MEMORY_FENCE_BACK) setWhen(input(MEMORY_FENCE) && (ff.PS && ff.SL)) + when(input(INSTRUCTION)(26)) { //AQ + if(withLrSc) insert(MEMORY_FENCE_BACK) setWhen(input(MEMORY_LRSC)) + if(withAmo) insert(MEMORY_FENCE_BACK) setWhen(input(MEMORY_AMO)) + } + when(input(INSTRUCTION)(25)) { //RL but a bit pessimistic as could be MEMORY_FENCE_BACK when the memory op isn't a read + if(withLrSc) insert(MEMORY_FENCE_FRONT) setWhen(input(MEMORY_LRSC)) + if(withAmo) insert(MEMORY_FENCE_FRONT) setWhen(input(MEMORY_AMO)) } } - insert(MEMORY_FENCE_DECODED) := hazard } } @@ -254,7 +260,7 @@ class DBusCachedPlugin(val config : DataCacheConfig, cache.io.cpu.flush.valid := arbitration.isValid && input(MEMORY_MANAGMENT) - cache.io.cpu.execute.fence := arbitration.isValid && input(MEMORY_FENCE_DECODED) + cache.io.cpu.execute.totalyConsistent := arbitration.isValid && input(MEMORY_FENCE_FRONT) arbitration.haltItself setWhen(cache.io.cpu.flush.isStall || cache.io.cpu.execute.haltIt) if(withLrSc) { @@ -296,6 +302,8 @@ class DBusCachedPlugin(val config : DataCacheConfig, cache.io.cpu.memory.mmuBus <> mmuBus cache.io.cpu.memory.mmuBus.rsp.isIoAccess setWhen(pipeline(DEBUG_BYPASS_CACHE) && !cache.io.cpu.memory.isWrite) + + cache.io.cpu.memory.fenceValid := arbitration.isValid && input(MEMORY_FENCE_BACK) } val managementStage = stages.last @@ -306,6 +314,9 @@ class DBusCachedPlugin(val config : DataCacheConfig, cache.io.cpu.writeBack.isUser := (if(privilegeService != null) privilegeService.isUser() else False) cache.io.cpu.writeBack.address := U(input(REGFILE_WRITE_DATA)) + cache.io.cpu.writeBack.fenceValid := arbitration.isValid && input(MEMORY_FENCE_BACK) + cache.io.cpu.writeBack.fenceFire := arbitration.isFiring && input(MEMORY_FENCE_BACK) + redoBranch.valid := False redoBranch.payload := input(PC) arbitration.flushIt setWhen(redoBranch.valid) diff --git a/src/test/cpp/raw/smp/build/smp.asm b/src/test/cpp/raw/smp/build/smp.asm index 06f2616..19fc727 100644 --- a/src/test/cpp/raw/smp/build/smp.asm +++ b/src/test/cpp/raw/smp/build/smp.asm @@ -10,24 +10,24 @@ Disassembly of section .crt_section: 80000008: f1402373 csrr t1,mhartid 8000000c: 01031313 slli t1,t1,0x10 80000010: 006282b3 add t0,t0,t1 -80000014: 0082a023 sw s0,0(t0) # f8000000 +80000014: 0082a023 sw s0,0(t0) # f8000000 80000018 : 80000018: 00100513 li a0,1 8000001c: 00000597 auipc a1,0x0 -80000020: 1d058593 addi a1,a1,464 # 800001ec +80000020: 36058593 addi a1,a1,864 # 8000037c 80000024: 00a5a02f amoadd.w zero,a0,(a1) 80000028 : 80000028: 00000417 auipc s0,0x0 -8000002c: 1c442403 lw s0,452(s0) # 800001ec +8000002c: 35442403 lw s0,852(s0) # 8000037c 80000030: 19000513 li a0,400 -80000034: 1ac000ef jal ra,800001e0 +80000034: 33c000ef jal ra,80000370 80000038: 00000497 auipc s1,0x0 -8000003c: 1b44a483 lw s1,436(s1) # 800001ec +8000003c: 3444a483 lw s1,836(s1) # 8000037c 80000040: fe8494e3 bne s1,s0,80000028 80000044: f80002b7 lui t0,0xf8000 -80000048: 00428293 addi t0,t0,4 # f8000004 +80000048: 00428293 addi t0,t0,4 # f8000004 8000004c: f1402373 csrr t1,mhartid 80000050: 01031313 slli t1,t1,0x10 80000054: 006282b3 add t0,t0,t1 @@ -35,132 +35,319 @@ Disassembly of section .crt_section: 8000005c : 8000005c: 00100513 li a0,1 -80000060: 040000ef jal ra,800000a0 +80000060: 1d0000ef jal ra,80000230 80000064: 00200513 li a0,2 -80000068: 038000ef jal ra,800000a0 +80000068: 1c8000ef jal ra,80000230 8000006c: 00300513 li a0,3 -80000070: 030000ef jal ra,800000a0 +80000070: 1c0000ef jal ra,80000230 80000074: 00400513 li a0,4 -80000078: 0a4000ef jal ra,8000011c +80000078: 234000ef jal ra,800002ac 8000007c: 00500513 li a0,5 -80000080: 09c000ef jal ra,8000011c +80000080: 22c000ef jal ra,800002ac 80000084: 00600513 li a0,6 -80000088: 094000ef jal ra,8000011c +80000088: 224000ef jal ra,800002ac 8000008c: 00700513 li a0,7 -80000090: 010000ef jal ra,800000a0 +80000090: 1a0000ef jal ra,80000230 80000094: 00800513 li a0,8 -80000098: 084000ef jal ra,8000011c -8000009c: 1000006f j 8000019c +80000098: 214000ef jal ra,800002ac +8000009c: 00000197 auipc gp,0x0 +800000a0: 2ec1a183 lw gp,748(gp) # 80000388 -800000a0 : -800000a0: f80002b7 lui t0,0xf8000 -800000a4: 00c28293 addi t0,t0,12 # f800000c -800000a8: f1402373 csrr t1,mhartid -800000ac: 01031313 slli t1,t1,0x10 -800000b0: 006282b3 add t0,t0,t1 -800000b4: 00a2a023 sw a0,0(t0) -800000b8: 00000e97 auipc t4,0x0 -800000bc: 13ceae83 lw t4,316(t4) # 800001f4 +800000a4 : +800000a4: 00018513 mv a0,gp +800000a8: 00118193 addi gp,gp,1 +800000ac: 200000ef jal ra,800002ac +800000b0: 00000297 auipc t0,0x0 +800000b4: 2e42a283 lw t0,740(t0) # 80000394 +800000b8: 00a00313 li t1,10 +800000bc: 1662d863 bge t0,t1,8000022c 800000c0: 00000297 auipc t0,0x0 -800000c4: 13028293 addi t0,t0,304 # 800001f0 -800000c8: 00100313 li t1,1 -800000cc: 0062a2af amoadd.w t0,t1,(t0) -800000d0: 00128293 addi t0,t0,1 -800000d4: 00000317 auipc t1,0x0 -800000d8: 11832303 lw t1,280(t1) # 800001ec -800000dc: 00629c63 bne t0,t1,800000f4 -800000e0: 001e8293 addi t0,t4,1 -800000e4: 00000317 auipc t1,0x0 -800000e8: 10032623 sw zero,268(t1) # 800001f0 -800000ec: 00000317 auipc t1,0x0 -800000f0: 10532423 sw t0,264(t1) # 800001f4 +800000c4: 2cc2a283 lw t0,716(t0) # 8000038c +800000c8: 00000317 auipc t1,0x0 +800000cc: 2c832303 lw t1,712(t1) # 80000390 +800000d0: 06628a63 beq t0,t1,80000144 +800000d4: f14022f3 csrr t0,mhartid +800000d8: 00000317 auipc t1,0x0 +800000dc: 2b432303 lw t1,692(t1) # 8000038c +800000e0: 00000417 auipc s0,0x0 +800000e4: 32040413 addi s0,s0,800 # 80000400 +800000e8: 00000497 auipc s1,0x0 +800000ec: 31c48493 addi s1,s1,796 # 80000404 +800000f0: 02628863 beq t0,t1,80000120 +800000f4: 00000317 auipc t1,0x0 +800000f8: 29c32303 lw t1,668(t1) # 80000390 +800000fc: 00000417 auipc s0,0x0 +80000100: 30840413 addi s0,s0,776 # 80000404 +80000104: 00000497 auipc s1,0x0 +80000108: 2fc48493 addi s1,s1,764 # 80000400 +8000010c: 00628a63 beq t0,t1,80000120 -800000f4 : -800000f4: 00000297 auipc t0,0x0 -800000f8: 1002a283 lw t0,256(t0) # 800001f4 -800000fc: ffd28ce3 beq t0,t4,800000f4 -80000100: f80002b7 lui t0,0xf8000 -80000104: 01028293 addi t0,t0,16 # f8000010 -80000108: f1402373 csrr t1,mhartid -8000010c: 01031313 slli t1,t1,0x10 -80000110: 006282b3 add t0,t0,t1 -80000114: 00a2a023 sw a0,0(t0) -80000118: 00008067 ret +80000110 : +80000110: 00018513 mv a0,gp +80000114: 00118193 addi gp,gp,1 +80000118: 194000ef jal ra,800002ac +8000011c: 0280006f j 80000144 -8000011c : -8000011c: f80002b7 lui t0,0xf8000 -80000120: 00c28293 addi t0,t0,12 # f800000c -80000124: f1402373 csrr t1,mhartid -80000128: 01031313 slli t1,t1,0x10 -8000012c: 006282b3 add t0,t0,t1 -80000130: 00a2a023 sw a0,0(t0) -80000134: 00000e97 auipc t4,0x0 -80000138: 0c0eae83 lw t4,192(t4) # 800001f4 -8000013c: 00000297 auipc t0,0x0 -80000140: 0b428293 addi t0,t0,180 # 800001f0 +80000120 : +80000120: 29a00913 li s2,666 +80000124: 00018513 mv a0,gp +80000128: 00118193 addi gp,gp,1 +8000012c: 0004a983 lw s3,0(s1) +80000130: 17c000ef jal ra,800002ac +80000134: 01242023 sw s2,0(s0) +80000138: 0120000f fence w,r +8000013c: 0004a983 lw s3,0(s1) +80000140: 05342023 sw s3,64(s0) -80000144 : -80000144: 1002a32f lr.w t1,(t0) -80000148: 00130313 addi t1,t1,1 -8000014c: 1862a3af sc.w t2,t1,(t0) -80000150: fe039ae3 bnez t2,80000144 -80000154: 00000297 auipc t0,0x0 -80000158: 0982a283 lw t0,152(t0) # 800001ec -8000015c: 00629c63 bne t0,t1,80000174 -80000160: 001e8293 addi t0,t4,1 +80000144 : +80000144: 0330000f fence rw,rw +80000148: 00018513 mv a0,gp +8000014c: 00118193 addi gp,gp,1 +80000150: 15c000ef jal ra,800002ac +80000154: f14022f3 csrr t0,mhartid +80000158: f40296e3 bnez t0,800000a4 + +8000015c : +8000015c: 00000297 auipc t0,0x0 +80000160: 2302a283 lw t0,560(t0) # 8000038c 80000164: 00000317 auipc t1,0x0 -80000168: 08032623 sw zero,140(t1) # 800001f0 -8000016c: 00000317 auipc t1,0x0 -80000170: 08532423 sw t0,136(t1) # 800001f4 +80000168: 22c32303 lw t1,556(t1) # 80000390 +8000016c: 04628263 beq t0,t1,800001b0 +80000170: 00000517 auipc a0,0x0 +80000174: 2d452503 lw a0,724(a0) # 80000444 +80000178: f80002b7 lui t0,0xf8000 +8000017c: 01428293 addi t0,t0,20 # f8000014 +80000180: f1402373 csrr t1,mhartid +80000184: 01031313 slli t1,t1,0x10 +80000188: 006282b3 add t0,t0,t1 +8000018c: 00a2a023 sw a0,0(t0) +80000190: 00000517 auipc a0,0x0 +80000194: 2b052503 lw a0,688(a0) # 80000440 +80000198: f80002b7 lui t0,0xf8000 +8000019c: 01428293 addi t0,t0,20 # f8000014 +800001a0: f1402373 csrr t1,mhartid +800001a4: 01031313 slli t1,t1,0x10 +800001a8: 006282b3 add t0,t0,t1 +800001ac: 00a2a023 sw a0,0(t0) -80000174 : -80000174: 00000297 auipc t0,0x0 -80000178: 0802a283 lw t0,128(t0) # 800001f4 -8000017c: ffd28ce3 beq t0,t4,80000174 -80000180: f80002b7 lui t0,0xf8000 -80000184: 01028293 addi t0,t0,16 # f8000010 -80000188: f1402373 csrr t1,mhartid -8000018c: 01031313 slli t1,t1,0x10 -80000190: 006282b3 add t0,t0,t1 -80000194: 00a2a023 sw a0,0(t0) -80000198: 00008067 ret +800001b0 : +800001b0: f14022f3 csrr t0,mhartid +800001b4: ee0298e3 bnez t0,800000a4 +800001b8: 00000297 auipc t0,0x0 +800001bc: 2402a423 sw zero,584(t0) # 80000400 +800001c0: 00000297 auipc t0,0x0 +800001c4: 2402a223 sw zero,580(t0) # 80000404 +800001c8: 00000417 auipc s0,0x0 +800001cc: 1b442403 lw s0,436(s0) # 8000037c +800001d0: 00000297 auipc t0,0x0 +800001d4: 1c02a283 lw t0,448(t0) # 80000390 +800001d8: 00128293 addi t0,t0,1 +800001dc: 00000317 auipc t1,0x0 +800001e0: 1a532a23 sw t0,436(t1) # 80000390 +800001e4: 04829063 bne t0,s0,80000224 +800001e8: 00000317 auipc t1,0x0 +800001ec: 1a032423 sw zero,424(t1) # 80000390 +800001f0: 00000297 auipc t0,0x0 +800001f4: 19c2a283 lw t0,412(t0) # 8000038c +800001f8: 00128293 addi t0,t0,1 +800001fc: 00000317 auipc t1,0x0 +80000200: 18532823 sw t0,400(t1) # 8000038c +80000204: 02829063 bne t0,s0,80000224 +80000208: 00000317 auipc t1,0x0 +8000020c: 18032223 sw zero,388(t1) # 8000038c +80000210: 00000297 auipc t0,0x0 +80000214: 1842a283 lw t0,388(t0) # 80000394 +80000218: 00128293 addi t0,t0,1 +8000021c: 00000317 auipc t1,0x0 +80000220: 16532c23 sw t0,376(t1) # 80000394 -8000019c : -8000019c: 00000413 li s0,0 -800001a0: f80002b7 lui t0,0xf8000 -800001a4: 00828293 addi t0,t0,8 # f8000008 -800001a8: f1402373 csrr t1,mhartid -800001ac: 01031313 slli t1,t1,0x10 -800001b0: 006282b3 add t0,t0,t1 -800001b4: 0082a023 sw s0,0(t0) -800001b8: 0240006f j 800001dc +80000224 : +80000224: 0130000f fence w,rw +80000228: e7dff06f j 800000a4 -800001bc : -800001bc: 00100413 li s0,1 -800001c0: f80002b7 lui t0,0xf8000 -800001c4: 00828293 addi t0,t0,8 # f8000008 -800001c8: f1402373 csrr t1,mhartid -800001cc: 01031313 slli t1,t1,0x10 -800001d0: 006282b3 add t0,t0,t1 -800001d4: 0082a023 sw s0,0(t0) -800001d8: 0040006f j 800001dc +8000022c : +8000022c: 1000006f j 8000032c -800001dc : -800001dc: 0000006f j 800001dc +80000230 : +80000230: f80002b7 lui t0,0xf8000 +80000234: 00c28293 addi t0,t0,12 # f800000c +80000238: f1402373 csrr t1,mhartid +8000023c: 01031313 slli t1,t1,0x10 +80000240: 006282b3 add t0,t0,t1 +80000244: 00a2a023 sw a0,0(t0) +80000248: 00000e97 auipc t4,0x0 +8000024c: 13ceae83 lw t4,316(t4) # 80000384 +80000250: 00000297 auipc t0,0x0 +80000254: 13028293 addi t0,t0,304 # 80000380 +80000258: 00100313 li t1,1 +8000025c: 0062a2af amoadd.w t0,t1,(t0) +80000260: 00128293 addi t0,t0,1 +80000264: 00000317 auipc t1,0x0 +80000268: 11832303 lw t1,280(t1) # 8000037c +8000026c: 00629c63 bne t0,t1,80000284 +80000270: 001e8293 addi t0,t4,1 +80000274: 00000317 auipc t1,0x0 +80000278: 10032623 sw zero,268(t1) # 80000380 +8000027c: 00000317 auipc t1,0x0 +80000280: 10532423 sw t0,264(t1) # 80000384 -800001e0 : -800001e0: fff50513 addi a0,a0,-1 -800001e4: fe051ee3 bnez a0,800001e0 -800001e8: 00008067 ret +80000284 : +80000284: 00000297 auipc t0,0x0 +80000288: 1002a283 lw t0,256(t0) # 80000384 +8000028c: ffd28ce3 beq t0,t4,80000284 +80000290: f80002b7 lui t0,0xf8000 +80000294: 01028293 addi t0,t0,16 # f8000010 +80000298: f1402373 csrr t1,mhartid +8000029c: 01031313 slli t1,t1,0x10 +800002a0: 006282b3 add t0,t0,t1 +800002a4: 00a2a023 sw a0,0(t0) +800002a8: 00008067 ret -800001ec : -800001ec: 0000 unimp +800002ac : +800002ac: f80002b7 lui t0,0xf8000 +800002b0: 00c28293 addi t0,t0,12 # f800000c +800002b4: f1402373 csrr t1,mhartid +800002b8: 01031313 slli t1,t1,0x10 +800002bc: 006282b3 add t0,t0,t1 +800002c0: 00a2a023 sw a0,0(t0) +800002c4: 00000e97 auipc t4,0x0 +800002c8: 0c0eae83 lw t4,192(t4) # 80000384 +800002cc: 00000297 auipc t0,0x0 +800002d0: 0b428293 addi t0,t0,180 # 80000380 + +800002d4 : +800002d4: 1002a32f lr.w t1,(t0) +800002d8: 00130313 addi t1,t1,1 +800002dc: 1862a3af sc.w t2,t1,(t0) +800002e0: fe039ae3 bnez t2,800002d4 +800002e4: 00000297 auipc t0,0x0 +800002e8: 0982a283 lw t0,152(t0) # 8000037c +800002ec: 00629c63 bne t0,t1,80000304 +800002f0: 001e8293 addi t0,t4,1 +800002f4: 00000317 auipc t1,0x0 +800002f8: 08032623 sw zero,140(t1) # 80000380 +800002fc: 00000317 auipc t1,0x0 +80000300: 08532423 sw t0,136(t1) # 80000384 + +80000304 : +80000304: 00000297 auipc t0,0x0 +80000308: 0802a283 lw t0,128(t0) # 80000384 +8000030c: ffd28ce3 beq t0,t4,80000304 +80000310: f80002b7 lui t0,0xf8000 +80000314: 01028293 addi t0,t0,16 # f8000010 +80000318: f1402373 csrr t1,mhartid +8000031c: 01031313 slli t1,t1,0x10 +80000320: 006282b3 add t0,t0,t1 +80000324: 00a2a023 sw a0,0(t0) +80000328: 00008067 ret + +8000032c : +8000032c: 00000413 li s0,0 +80000330: f80002b7 lui t0,0xf8000 +80000334: 00828293 addi t0,t0,8 # f8000008 +80000338: f1402373 csrr t1,mhartid +8000033c: 01031313 slli t1,t1,0x10 +80000340: 006282b3 add t0,t0,t1 +80000344: 0082a023 sw s0,0(t0) +80000348: 0240006f j 8000036c + +8000034c : +8000034c: 00100413 li s0,1 +80000350: f80002b7 lui t0,0xf8000 +80000354: 00828293 addi t0,t0,8 # f8000008 +80000358: f1402373 csrr t1,mhartid +8000035c: 01031313 slli t1,t1,0x10 +80000360: 006282b3 add t0,t0,t1 +80000364: 0082a023 sw s0,0(t0) +80000368: 0040006f j 8000036c + +8000036c : +8000036c: 0000006f j 8000036c + +80000370 : +80000370: fff50513 addi a0,a0,-1 +80000374: fe051ee3 bnez a0,80000370 +80000378: 00008067 ret + +8000037c : +8000037c: 0000 unimp ... -800001f0 : -800001f0: 0000 unimp +80000380 : +80000380: 0000 unimp ... -800001f4 : -800001f4: 0000 unimp +80000384 : +80000384: 0000 unimp + ... + +80000388 : +80000388: 1000 addi s0,sp,32 + ... + +8000038c : +8000038c: 0000 unimp + ... + +80000390 : +80000390: 0000 unimp + ... + +80000394 : +80000394: 0000 unimp +80000396: 0000 unimp +80000398: 00000013 nop +8000039c: 00000013 nop +800003a0: 00000013 nop +800003a4: 00000013 nop +800003a8: 00000013 nop +800003ac: 00000013 nop +800003b0: 00000013 nop +800003b4: 00000013 nop +800003b8: 00000013 nop +800003bc: 00000013 nop +800003c0: 00000013 nop +800003c4: 00000013 nop +800003c8: 00000013 nop +800003cc: 00000013 nop +800003d0: 00000013 nop +800003d4: 00000013 nop +800003d8: 00000013 nop +800003dc: 00000013 nop +800003e0: 00000013 nop +800003e4: 00000013 nop +800003e8: 00000013 nop +800003ec: 00000013 nop +800003f0: 00000013 nop +800003f4: 00000013 nop +800003f8: 00000013 nop +800003fc: 00000013 nop + +80000400 : +80000400: 0000 unimp + ... + +80000404 : +80000404: 0000 unimp +80000406: 0000 unimp +80000408: 00000013 nop +8000040c: 00000013 nop +80000410: 00000013 nop +80000414: 00000013 nop +80000418: 00000013 nop +8000041c: 00000013 nop +80000420: 00000013 nop +80000424: 00000013 nop +80000428: 00000013 nop +8000042c: 00000013 nop +80000430: 00000013 nop +80000434: 00000013 nop +80000438: 00000013 nop +8000043c: 00000013 nop + +80000440 : +80000440: 0000 unimp + ... + +80000444 : ... diff --git a/src/test/cpp/raw/smp/build/smp.bin b/src/test/cpp/raw/smp/build/smp.bin index 59a832f..b391a14 100755 Binary files a/src/test/cpp/raw/smp/build/smp.bin and b/src/test/cpp/raw/smp/build/smp.bin differ diff --git a/src/test/cpp/raw/smp/src/crt.S b/src/test/cpp/raw/smp/src/crt.S index 72cc5b8..4f984eb 100644 --- a/src/test/cpp/raw/smp/src/crt.S +++ b/src/test/cpp/raw/smp/src/crt.S @@ -1,9 +1,13 @@ +#define CONSISTENCY_REDO_COUNT 10 + + #define REPORT_OFFSET 0xF8000000 #define REPORT_THREAD_ID 0x00 #define REPORT_THREAD_COUNT 0x04 #define REPORT_END 0x08 #define REPORT_BARRIER_START 0x0C #define REPORT_BARRIER_END 0x10 +#define REPORT_CONSISTENCY_VALUES 0x14 #define report(reg, id) \ li t0, REPORT_OFFSET+id; \ @@ -54,6 +58,92 @@ barrier_amo_test: call barrier_lrsc + lw gp, barrier_allocator +consistancy_loop: + //Sync + mv a0, gp + addi gp, gp, 1 + call barrier_lrsc + + //all passed ? + lw t0, consistancy_all_tested + li t1, CONSISTENCY_REDO_COUNT + bge t0, t1, consistancy_passed + + //identify who is A, who is B + lw t0, consistancy_a_hart + lw t1, consistancy_b_hart + beq t0, t1, consistancy_join + csrr t0, mhartid + lw t1, consistancy_a_hart + la s0, consistancy_a_value + la s1, consistancy_b_value + beq t0, t1, consistancy_do + lw t1, consistancy_b_hart + la s0, consistancy_b_value + la s1, consistancy_a_value + beq t0, t1, consistancy_do + +consistancy_hart_not_involved: + mv a0, gp + addi gp, gp, 1 + call barrier_lrsc + j consistancy_join + +consistancy_do: + li s2, 666 + mv a0, gp + addi gp, gp, 1 + lw s3, (s1) //Help getting the cache loaded for the consistancy check + call barrier_lrsc + + //Consistancy check : write to read ordering on two thread + sw s2, (s0) + fence w,r + lw s3, (s1) + sw s3, 64(s0) + +consistancy_join: + fence rw, rw //ensure updated values + mv a0, gp + addi gp, gp, 1 + call barrier_lrsc + csrr t0, mhartid + bnez t0, consistancy_loop + +consistancy_assert: + lw t0, consistancy_a_hart + lw t1, consistancy_b_hart + beq t0, t1, consistancy_increment + lw a0, consistancy_a_readed + report(a0, REPORT_CONSISTENCY_VALUES) + lw a0, consistancy_b_readed + report(a0, REPORT_CONSISTENCY_VALUES) + +consistancy_increment: + csrr t0, mhartid + bnez t0, consistancy_loop + sw x0, (consistancy_a_value), t0 + sw x0, (consistancy_b_value), t0 + lw s0,thread_count + lw t0,consistancy_b_hart + addi t0, t0, 1 + sw t0, consistancy_b_hart, t1 + bne t0, s0, consistancy_increment_fence + sw x0, consistancy_b_hart, t1 + lw t0,consistancy_a_hart + addi t0, t0, 1 + sw t0, consistancy_a_hart, t1 + bne t0, s0, consistancy_increment_fence + sw x0, consistancy_a_hart, t1 + lw t0, consistancy_all_tested + addi t0, t0, 1 + sw t0, consistancy_all_tested, t1 +consistancy_increment_fence: + fence w, rw + j consistancy_loop + +consistancy_passed: j success @@ -78,24 +168,25 @@ barrier_amo_wait: ret barrier_lrsc: - report(a0, REPORT_BARRIER_START) - lw ENTRY_PHASE, barrier_phase - la t0, barrier_value + report(a0, REPORT_BARRIER_START) + lw ENTRY_PHASE, barrier_phase + la t0, barrier_value barrier_lrsc_try: - lr.w t1, (t0) - addi t1, t1, 1 - sc.w t2, t1, (t0) - bnez t2, barrier_lrsc_try - lw t0, thread_count - bne t0, t1, barrier_lrsc_wait - addi t0,ENTRY_PHASE,1 - sw x0, barrier_value, t1 - sw t0, barrier_phase, t1 + lr.w t1, (t0) + addi t1, t1, 1 + sc.w t2, t1, (t0) + bnez t2, barrier_lrsc_try + lw t0, thread_count + bne t0, t1, barrier_lrsc_wait + addi t0,ENTRY_PHASE,1 + sw x0, barrier_value, t1 + sw t0, barrier_phase, t1 barrier_lrsc_wait: - lw t0, barrier_phase - beq t0, ENTRY_PHASE, barrier_lrsc_wait - report(a0, REPORT_BARRIER_END) - ret + lw t0, barrier_phase + beq t0, ENTRY_PHASE, barrier_lrsc_wait + report(a0, REPORT_BARRIER_END) + ret + @@ -120,5 +211,27 @@ sleep: thread_count: .word 0 + +.align 6 //Same cache line barrier_value: .word 0 barrier_phase: .word 0 +barrier_allocator: .word 0x1000 + +consistancy_a_hart: .word 0 +consistancy_b_hart: .word 0 +consistancy_all_tested: .word 0 + + +nop;nop;nop;nop;nop;nop;nop;nop; +nop;nop;nop;nop;nop;nop;nop;nop; +.align 6 //Same cache line +consistancy_a_value: .word 0 +consistancy_b_value: .word 0 + +.align 6 //Same cache line +consistancy_b_readed: .word 0 +consistancy_a_readed: .word 0 + +.align 6 //Same cache line +consistancy_init_call: .word 0 +consistancy_do_call: .word 0 \ No newline at end of file