Add smp consistency check, fix VexRiscv invalidation read during write hazard logic

This commit is contained in:
Dolu1990 2020-04-21 12:18:10 +02:00
parent 8e8b64feaa
commit b389878d23
6 changed files with 517 additions and 161 deletions

View File

@ -292,6 +292,7 @@ object VexRiscvSmpClusterTestInfrastructure{
val REPORT_END = 0x08
val REPORT_BARRIER_START = 0x0C
val REPORT_BARRIER_END = 0x10
val REPORT_CONSISTENCY_VALUES = 0x14
val PUTC = 0x00
val GETC = 0x04
@ -310,14 +311,7 @@ object VexRiscvSmpClusterTestInfrastructure{
}
}
val reports = ArrayBuffer.fill(cpuCount)(ArrayBuffer[Report]())
onSimEnd{
for((list, hart) <- reports.zipWithIndex){
println(f"\n\n**** CPU $hart%2d ****")
for((report, reportId) <- list.zipWithIndex){
println(f" $reportId%3d : ${report.code}%3x -> ${report.data}%3d")
}
}
}
val writeTable = mutable.HashMap[Int, Int => Unit]()
val readTable = mutable.HashMap[Int, () => Int]()
@ -329,6 +323,24 @@ object VexRiscvSmpClusterTestInfrastructure{
var reportWatchdog = 0
val cpuEnd = Array.fill(cpuCount)(false)
val barriers = mutable.HashMap[Int, Int]()
var consistancyCounter = 0
var consistancyLast = 0
var consistancyA = 0
var consistancyB = 0
var consistancyAB = 0
var consistancyNone = 0
onSimEnd{
for((list, hart) <- reports.zipWithIndex){
println(f"\n\n**** CPU $hart%2d ****")
for((report, reportId) <- list.zipWithIndex){
println(f" $reportId%3d : ${report.code}%3x -> ${report.data}%3d")
}
}
println(s"consistancy NONE:$consistancyNone A:$consistancyA B:$consistancyB AB:$consistancyAB")
}
override def setByte(address: Long, value: Byte): Unit = {
if((address & 0xF0000000l) != 0xF0000000l) return super.setByte(address, value)
val byteId = address & 3
@ -344,7 +356,7 @@ object VexRiscvSmpClusterTestInfrastructure{
code = (offset & 0x00FFFF).toInt,
data = writeData
)
println(report)
// println(report)
reports(report.hart) += report
reportWatchdog += 1
import report._
@ -361,6 +373,21 @@ object VexRiscvSmpClusterTestInfrastructure{
val counter = barriers.getOrElse(data, 0)
assert(counter == cpuCount)
}
case REPORT_CONSISTENCY_VALUES => consistancyCounter match {
case 0 => {
consistancyCounter = 1
consistancyLast = data
}
case 1 => {
consistancyCounter = 0
(data, consistancyLast) match {
case (666, 0) => consistancyA += 1
case (0, 666) => consistancyB += 1
case (666, 666) => consistancyAB += 1
case (0,0) => consistancyNone += 1; simFailure("Consistancy issue :(")
}
}
}
}
}
case _ => writeTable.get(offset.toInt) match {
@ -440,7 +467,7 @@ object VexRiscvSmpClusterTest extends App{
import spinal.core.sim._
val simConfig = SimConfig
// simConfig.withWave
simConfig.withWave
simConfig.allOptimisation
simConfig.addSimulatorFlag("--threads 1")
@ -448,7 +475,9 @@ object VexRiscvSmpClusterTest extends App{
val withStall = true
simConfig.compile(VexRiscvSmpClusterGen.vexRiscvCluster(cpuCount)).doSimUntilVoid(seed = 42){dut =>
SimTimeout(10000*10*cpuCount)
disableSimWave()
SimTimeout(100000000l*10*cpuCount)
dut.clockDomain.forkSimSpeedPrinter(1.0)
VexRiscvSmpClusterTestInfrastructure.init(dut)
val ram = VexRiscvSmpClusterTestInfrastructure.ram(dut)
ram.memory.loadBin(0x80000000l, "src/test/cpp/raw/smp/build/smp.bin")

View File

@ -102,10 +102,10 @@ case class DataCacheCpuExecute(p : DataCacheConfig) extends Bundle with IMasterS
val address = UInt(p.addressWidth bit)
val haltIt = Bool
val args = DataCacheCpuExecuteArgs(p)
val fence = Bool()
val totalyConsistent = Bool()
override def asMaster(): Unit = {
out(isValid, args, address, fence)
out(isValid, args, address, totalyConsistent)
in(haltIt)
}
}
@ -129,9 +129,10 @@ case class DataCacheCpuMemory(p : DataCacheConfig) extends Bundle with IMasterSl
val isWrite = Bool
val address = UInt(p.addressWidth bit)
val mmuBus = MemoryTranslatorBus()
val fenceValid = Bool()
override def asMaster(): Unit = {
out(isValid, isStuck, isRemoved, address)
out(isValid, isStuck, isRemoved, address, fenceValid)
in(isWrite)
slave(mmuBus)
}
@ -148,11 +149,13 @@ case class DataCacheCpuWriteBack(p : DataCacheConfig) extends Bundle with IMaste
val address = UInt(p.addressWidth bit)
val mmuException, unalignedAccess, accessError = Bool()
val keepMemRspData = Bool() //Used by external AMO to avoid having an internal buffer
val fenceValid = Bool()
val fenceFire = Bool()
// val exceptionBus = if(p.catchSomething) Flow(ExceptionCause()) else null
override def asMaster(): Unit = {
out(isValid,isStuck,isUser, address)
out(isValid,isStuck,isUser, address, fenceValid, fenceFire)
in(haltIt, data, mmuException, unalignedAccess, accessError, isWrite, keepMemRspData)
}
}
@ -514,7 +517,7 @@ class DataCache(val p : DataCacheConfig) extends Component{
val counter = Reg(UInt(log2Up(pendingMax) + 1 bits)) init(0)
counter := counter + U(io.mem.cmd.fire && io.mem.cmd.last) - U(io.mem.rsp.valid && io.mem.rsp.last)
val consistent = counter === 0
val done = counter === 0
val full = RegNext(counter.msb)
val last = counter === 1
@ -529,16 +532,26 @@ class DataCache(val p : DataCacheConfig) extends Component{
val sync = withInvalidate generate new Area{
io.mem.sync.ready := True
val counter = Reg(UInt(log2Up(pendingMax) + 1 bits)) init(0)
counter := counter + U(io.mem.cmd.fire && io.mem.cmd.wr) - U(io.mem.sync.fire)
val pendingSync = Reg(UInt(log2Up(pendingMax) + 1 bits)) init(0)
val pendingSyncNext = pendingSync + U(io.mem.cmd.fire && io.mem.cmd.wr) - U(io.mem.sync.fire)
pendingSync := pendingSyncNext
val full = RegNext(counter.msb)
val full = RegNext(pendingSync.msb)
io.cpu.execute.haltIt setWhen(full)
val consistent = counter === 0
val incoerentSync = Reg(UInt(log2Up(pendingMax) + 1 bits)) init(0)
incoerentSync := incoerentSync - U(io.mem.sync.fire && incoerentSync =/= 0)
when(io.cpu.writeBack.fenceValid){ incoerentSync := pendingSyncNext }
val totalyConsistent = pendingSync === 0
val fenceConsistent = incoerentSync === 0
}
val stage0 = new Area{
val mask = io.cpu.execute.size.mux (
U(0) -> B"0001",
@ -548,10 +561,14 @@ class DataCache(val p : DataCacheConfig) extends Component{
val dataColisions = collisionProcess(io.cpu.execute.address(lineRange.high downto wordRange.low), mask)
val wayInvalidate = B(0, wayCount bits) //Used if invalidate enabled
when(io.cpu.execute.fence){
val consistent = if(withInvalidate) sync.consistent else if(withWriteResponse) pending.consistent else null
if(consistent != null){
when(!consistent || io.cpu.memory.isValid && io.cpu.memory.isWrite || io.cpu.writeBack.isValid && io.cpu.memory.isWrite){
val isAmo = if(withAmo) io.cpu.execute.isAmo else False
//Ensure write to read consistency
val consistancyCheck = (withInvalidate || withWriteResponse) generate new Area {
val fenceConsistent = (if(withInvalidate) sync.fenceConsistent else pending.done) && !io.cpu.writeBack.fenceValid && !io.cpu.memory.fenceValid //Pessimistic fence tracking
val totalyConsistent = (if(withInvalidate) sync.totalyConsistent else pending.done) && !(io.cpu.memory.isValid && io.cpu.memory.isWrite) && !(io.cpu.writeBack.isValid && io.cpu.memory.isWrite)
when(io.cpu.execute.isValid && (!io.cpu.execute.args.wr || isAmo)){
when(!fenceConsistent || io.cpu.execute.totalyConsistent && !totalyConsistent){
io.cpu.execute.haltIt := True
}
}
@ -632,7 +649,6 @@ class DataCache(val p : DataCacheConfig) extends Component{
}
}
val lrSc = withInternalLrSc generate new Area{
val reserved = RegInit(False)
when(io.cpu.writeBack.isValid && !io.cpu.writeBack.isStuck && request.isLrsc
@ -923,7 +939,7 @@ class DataCache(val p : DataCacheConfig) extends Component{
io.mem.ack.hit := wayHit
//Manage invalidation read during write hazard
s1.invalidations := RegNext((input.valid && input.enable) ? wayHits | 0)
s1.invalidations := RegNextWhen((input.valid && input.enable && input.address(lineRange) === s0.input.address(lineRange)) ? wayHits | 0, s0.input.ready)
}
}
}

View File

@ -50,7 +50,8 @@ class DBusCachedPlugin(val config : DataCacheConfig,
object MEMORY_LRSC extends Stageable(Bool)
object MEMORY_AMO extends Stageable(Bool)
object MEMORY_FENCE extends Stageable(Bool)
object MEMORY_FENCE_DECODED extends Stageable(Bool)
object MEMORY_FENCE_FRONT extends Stageable(Bool)
object MEMORY_FENCE_BACK extends Stageable(Bool)
object IS_DBUS_SHARING extends Stageable(Bool())
object MEMORY_VIRTUAL_ADDRESS extends Stageable(UInt(32 bits))
@ -224,17 +225,22 @@ class DBusCachedPlugin(val config : DataCacheConfig,
def PS = PW || PO
}
//Manage write to read hit ordering (ensure invalidation timings)
val fence = new Area{
val hazard = False
insert(MEMORY_FENCE_FRONT) := False
insert(MEMORY_FENCE_BACK) := False
val ff = input(INSTRUCTION)(31 downto 20).as(FenceFlags())
if(withWriteResponse){
hazard setWhen(input(MEMORY_FENCE) && (ff.PS && ff.SL)) //Manage write to read hit ordering (ensure invalidation timings)
when(input(INSTRUCTION)(26 downto 25) =/= 0){
if(withLrSc) hazard setWhen(input(MEMORY_LRSC))
if(withAmo) hazard setWhen(input(MEMORY_AMO))
insert(MEMORY_FENCE_BACK) setWhen(input(MEMORY_FENCE) && (ff.PS && ff.SL))
when(input(INSTRUCTION)(26)) { //AQ
if(withLrSc) insert(MEMORY_FENCE_BACK) setWhen(input(MEMORY_LRSC))
if(withAmo) insert(MEMORY_FENCE_BACK) setWhen(input(MEMORY_AMO))
}
when(input(INSTRUCTION)(25)) { //RL but a bit pessimistic as could be MEMORY_FENCE_BACK when the memory op isn't a read
if(withLrSc) insert(MEMORY_FENCE_FRONT) setWhen(input(MEMORY_LRSC))
if(withAmo) insert(MEMORY_FENCE_FRONT) setWhen(input(MEMORY_AMO))
}
}
insert(MEMORY_FENCE_DECODED) := hazard
}
}
@ -254,7 +260,7 @@ class DBusCachedPlugin(val config : DataCacheConfig,
cache.io.cpu.flush.valid := arbitration.isValid && input(MEMORY_MANAGMENT)
cache.io.cpu.execute.fence := arbitration.isValid && input(MEMORY_FENCE_DECODED)
cache.io.cpu.execute.totalyConsistent := arbitration.isValid && input(MEMORY_FENCE_FRONT)
arbitration.haltItself setWhen(cache.io.cpu.flush.isStall || cache.io.cpu.execute.haltIt)
if(withLrSc) {
@ -296,6 +302,8 @@ class DBusCachedPlugin(val config : DataCacheConfig,
cache.io.cpu.memory.mmuBus <> mmuBus
cache.io.cpu.memory.mmuBus.rsp.isIoAccess setWhen(pipeline(DEBUG_BYPASS_CACHE) && !cache.io.cpu.memory.isWrite)
cache.io.cpu.memory.fenceValid := arbitration.isValid && input(MEMORY_FENCE_BACK)
}
val managementStage = stages.last
@ -306,6 +314,9 @@ class DBusCachedPlugin(val config : DataCacheConfig,
cache.io.cpu.writeBack.isUser := (if(privilegeService != null) privilegeService.isUser() else False)
cache.io.cpu.writeBack.address := U(input(REGFILE_WRITE_DATA))
cache.io.cpu.writeBack.fenceValid := arbitration.isValid && input(MEMORY_FENCE_BACK)
cache.io.cpu.writeBack.fenceFire := arbitration.isFiring && input(MEMORY_FENCE_BACK)
redoBranch.valid := False
redoBranch.payload := input(PC)
arbitration.flushIt setWhen(redoBranch.valid)

View File

@ -10,24 +10,24 @@ Disassembly of section .crt_section:
80000008: f1402373 csrr t1,mhartid
8000000c: 01031313 slli t1,t1,0x10
80000010: 006282b3 add t0,t0,t1
80000014: 0082a023 sw s0,0(t0) # f8000000 <barrier_phase+0x77fffe0c>
80000014: 0082a023 sw s0,0(t0) # f8000000 <consistancy_a_readed+0x77fffbbc>
80000018 <count_thread_start>:
80000018: 00100513 li a0,1
8000001c: 00000597 auipc a1,0x0
80000020: 1d058593 addi a1,a1,464 # 800001ec <thread_count>
80000020: 36058593 addi a1,a1,864 # 8000037c <thread_count>
80000024: 00a5a02f amoadd.w zero,a0,(a1)
80000028 <count_thread_wait>:
80000028: 00000417 auipc s0,0x0
8000002c: 1c442403 lw s0,452(s0) # 800001ec <thread_count>
8000002c: 35442403 lw s0,852(s0) # 8000037c <thread_count>
80000030: 19000513 li a0,400
80000034: 1ac000ef jal ra,800001e0 <sleep>
80000034: 33c000ef jal ra,80000370 <sleep>
80000038: 00000497 auipc s1,0x0
8000003c: 1b44a483 lw s1,436(s1) # 800001ec <thread_count>
8000003c: 3444a483 lw s1,836(s1) # 8000037c <thread_count>
80000040: fe8494e3 bne s1,s0,80000028 <count_thread_wait>
80000044: f80002b7 lui t0,0xf8000
80000048: 00428293 addi t0,t0,4 # f8000004 <barrier_phase+0x77fffe10>
80000048: 00428293 addi t0,t0,4 # f8000004 <consistancy_a_readed+0x77fffbc0>
8000004c: f1402373 csrr t1,mhartid
80000050: 01031313 slli t1,t1,0x10
80000054: 006282b3 add t0,t0,t1
@ -35,132 +35,319 @@ Disassembly of section .crt_section:
8000005c <barrier_amo_test>:
8000005c: 00100513 li a0,1
80000060: 040000ef jal ra,800000a0 <barrier_amo>
80000060: 1d0000ef jal ra,80000230 <barrier_amo>
80000064: 00200513 li a0,2
80000068: 038000ef jal ra,800000a0 <barrier_amo>
80000068: 1c8000ef jal ra,80000230 <barrier_amo>
8000006c: 00300513 li a0,3
80000070: 030000ef jal ra,800000a0 <barrier_amo>
80000070: 1c0000ef jal ra,80000230 <barrier_amo>
80000074: 00400513 li a0,4
80000078: 0a4000ef jal ra,8000011c <barrier_lrsc>
80000078: 234000ef jal ra,800002ac <barrier_lrsc>
8000007c: 00500513 li a0,5
80000080: 09c000ef jal ra,8000011c <barrier_lrsc>
80000080: 22c000ef jal ra,800002ac <barrier_lrsc>
80000084: 00600513 li a0,6
80000088: 094000ef jal ra,8000011c <barrier_lrsc>
80000088: 224000ef jal ra,800002ac <barrier_lrsc>
8000008c: 00700513 li a0,7
80000090: 010000ef jal ra,800000a0 <barrier_amo>
80000090: 1a0000ef jal ra,80000230 <barrier_amo>
80000094: 00800513 li a0,8
80000098: 084000ef jal ra,8000011c <barrier_lrsc>
8000009c: 1000006f j 8000019c <success>
80000098: 214000ef jal ra,800002ac <barrier_lrsc>
8000009c: 00000197 auipc gp,0x0
800000a0: 2ec1a183 lw gp,748(gp) # 80000388 <barrier_allocator>
800000a0 <barrier_amo>:
800000a0: f80002b7 lui t0,0xf8000
800000a4: 00c28293 addi t0,t0,12 # f800000c <barrier_phase+0x77fffe18>
800000a8: f1402373 csrr t1,mhartid
800000ac: 01031313 slli t1,t1,0x10
800000b0: 006282b3 add t0,t0,t1
800000b4: 00a2a023 sw a0,0(t0)
800000b8: 00000e97 auipc t4,0x0
800000bc: 13ceae83 lw t4,316(t4) # 800001f4 <barrier_phase>
800000a4 <consistancy_loop>:
800000a4: 00018513 mv a0,gp
800000a8: 00118193 addi gp,gp,1
800000ac: 200000ef jal ra,800002ac <barrier_lrsc>
800000b0: 00000297 auipc t0,0x0
800000b4: 2e42a283 lw t0,740(t0) # 80000394 <consistancy_all_tested>
800000b8: 00a00313 li t1,10
800000bc: 1662d863 bge t0,t1,8000022c <consistancy_passed>
800000c0: 00000297 auipc t0,0x0
800000c4: 13028293 addi t0,t0,304 # 800001f0 <barrier_value>
800000c8: 00100313 li t1,1
800000cc: 0062a2af amoadd.w t0,t1,(t0)
800000d0: 00128293 addi t0,t0,1
800000d4: 00000317 auipc t1,0x0
800000d8: 11832303 lw t1,280(t1) # 800001ec <thread_count>
800000dc: 00629c63 bne t0,t1,800000f4 <barrier_amo_wait>
800000e0: 001e8293 addi t0,t4,1
800000e4: 00000317 auipc t1,0x0
800000e8: 10032623 sw zero,268(t1) # 800001f0 <barrier_value>
800000ec: 00000317 auipc t1,0x0
800000f0: 10532423 sw t0,264(t1) # 800001f4 <barrier_phase>
800000c4: 2cc2a283 lw t0,716(t0) # 8000038c <consistancy_a_hart>
800000c8: 00000317 auipc t1,0x0
800000cc: 2c832303 lw t1,712(t1) # 80000390 <consistancy_b_hart>
800000d0: 06628a63 beq t0,t1,80000144 <consistancy_join>
800000d4: f14022f3 csrr t0,mhartid
800000d8: 00000317 auipc t1,0x0
800000dc: 2b432303 lw t1,692(t1) # 8000038c <consistancy_a_hart>
800000e0: 00000417 auipc s0,0x0
800000e4: 32040413 addi s0,s0,800 # 80000400 <consistancy_a_value>
800000e8: 00000497 auipc s1,0x0
800000ec: 31c48493 addi s1,s1,796 # 80000404 <consistancy_b_value>
800000f0: 02628863 beq t0,t1,80000120 <consistancy_do>
800000f4: 00000317 auipc t1,0x0
800000f8: 29c32303 lw t1,668(t1) # 80000390 <consistancy_b_hart>
800000fc: 00000417 auipc s0,0x0
80000100: 30840413 addi s0,s0,776 # 80000404 <consistancy_b_value>
80000104: 00000497 auipc s1,0x0
80000108: 2fc48493 addi s1,s1,764 # 80000400 <consistancy_a_value>
8000010c: 00628a63 beq t0,t1,80000120 <consistancy_do>
800000f4 <barrier_amo_wait>:
800000f4: 00000297 auipc t0,0x0
800000f8: 1002a283 lw t0,256(t0) # 800001f4 <barrier_phase>
800000fc: ffd28ce3 beq t0,t4,800000f4 <barrier_amo_wait>
80000100: f80002b7 lui t0,0xf8000
80000104: 01028293 addi t0,t0,16 # f8000010 <barrier_phase+0x77fffe1c>
80000108: f1402373 csrr t1,mhartid
8000010c: 01031313 slli t1,t1,0x10
80000110: 006282b3 add t0,t0,t1
80000114: 00a2a023 sw a0,0(t0)
80000118: 00008067 ret
80000110 <consistancy_hart_not_involved>:
80000110: 00018513 mv a0,gp
80000114: 00118193 addi gp,gp,1
80000118: 194000ef jal ra,800002ac <barrier_lrsc>
8000011c: 0280006f j 80000144 <consistancy_join>
8000011c <barrier_lrsc>:
8000011c: f80002b7 lui t0,0xf8000
80000120: 00c28293 addi t0,t0,12 # f800000c <barrier_phase+0x77fffe18>
80000124: f1402373 csrr t1,mhartid
80000128: 01031313 slli t1,t1,0x10
8000012c: 006282b3 add t0,t0,t1
80000130: 00a2a023 sw a0,0(t0)
80000134: 00000e97 auipc t4,0x0
80000138: 0c0eae83 lw t4,192(t4) # 800001f4 <barrier_phase>
8000013c: 00000297 auipc t0,0x0
80000140: 0b428293 addi t0,t0,180 # 800001f0 <barrier_value>
80000120 <consistancy_do>:
80000120: 29a00913 li s2,666
80000124: 00018513 mv a0,gp
80000128: 00118193 addi gp,gp,1
8000012c: 0004a983 lw s3,0(s1)
80000130: 17c000ef jal ra,800002ac <barrier_lrsc>
80000134: 01242023 sw s2,0(s0)
80000138: 0120000f fence w,r
8000013c: 0004a983 lw s3,0(s1)
80000140: 05342023 sw s3,64(s0)
80000144 <barrier_lrsc_try>:
80000144: 1002a32f lr.w t1,(t0)
80000148: 00130313 addi t1,t1,1
8000014c: 1862a3af sc.w t2,t1,(t0)
80000150: fe039ae3 bnez t2,80000144 <barrier_lrsc_try>
80000154: 00000297 auipc t0,0x0
80000158: 0982a283 lw t0,152(t0) # 800001ec <thread_count>
8000015c: 00629c63 bne t0,t1,80000174 <barrier_lrsc_wait>
80000160: 001e8293 addi t0,t4,1
80000144 <consistancy_join>:
80000144: 0330000f fence rw,rw
80000148: 00018513 mv a0,gp
8000014c: 00118193 addi gp,gp,1
80000150: 15c000ef jal ra,800002ac <barrier_lrsc>
80000154: f14022f3 csrr t0,mhartid
80000158: f40296e3 bnez t0,800000a4 <consistancy_loop>
8000015c <consistancy_assert>:
8000015c: 00000297 auipc t0,0x0
80000160: 2302a283 lw t0,560(t0) # 8000038c <consistancy_a_hart>
80000164: 00000317 auipc t1,0x0
80000168: 08032623 sw zero,140(t1) # 800001f0 <barrier_value>
8000016c: 00000317 auipc t1,0x0
80000170: 08532423 sw t0,136(t1) # 800001f4 <barrier_phase>
80000168: 22c32303 lw t1,556(t1) # 80000390 <consistancy_b_hart>
8000016c: 04628263 beq t0,t1,800001b0 <consistancy_increment>
80000170: 00000517 auipc a0,0x0
80000174: 2d452503 lw a0,724(a0) # 80000444 <consistancy_a_readed>
80000178: f80002b7 lui t0,0xf8000
8000017c: 01428293 addi t0,t0,20 # f8000014 <consistancy_a_readed+0x77fffbd0>
80000180: f1402373 csrr t1,mhartid
80000184: 01031313 slli t1,t1,0x10
80000188: 006282b3 add t0,t0,t1
8000018c: 00a2a023 sw a0,0(t0)
80000190: 00000517 auipc a0,0x0
80000194: 2b052503 lw a0,688(a0) # 80000440 <consistancy_b_readed>
80000198: f80002b7 lui t0,0xf8000
8000019c: 01428293 addi t0,t0,20 # f8000014 <consistancy_a_readed+0x77fffbd0>
800001a0: f1402373 csrr t1,mhartid
800001a4: 01031313 slli t1,t1,0x10
800001a8: 006282b3 add t0,t0,t1
800001ac: 00a2a023 sw a0,0(t0)
80000174 <barrier_lrsc_wait>:
80000174: 00000297 auipc t0,0x0
80000178: 0802a283 lw t0,128(t0) # 800001f4 <barrier_phase>
8000017c: ffd28ce3 beq t0,t4,80000174 <barrier_lrsc_wait>
80000180: f80002b7 lui t0,0xf8000
80000184: 01028293 addi t0,t0,16 # f8000010 <barrier_phase+0x77fffe1c>
80000188: f1402373 csrr t1,mhartid
8000018c: 01031313 slli t1,t1,0x10
80000190: 006282b3 add t0,t0,t1
80000194: 00a2a023 sw a0,0(t0)
80000198: 00008067 ret
800001b0 <consistancy_increment>:
800001b0: f14022f3 csrr t0,mhartid
800001b4: ee0298e3 bnez t0,800000a4 <consistancy_loop>
800001b8: 00000297 auipc t0,0x0
800001bc: 2402a423 sw zero,584(t0) # 80000400 <consistancy_a_value>
800001c0: 00000297 auipc t0,0x0
800001c4: 2402a223 sw zero,580(t0) # 80000404 <consistancy_b_value>
800001c8: 00000417 auipc s0,0x0
800001cc: 1b442403 lw s0,436(s0) # 8000037c <thread_count>
800001d0: 00000297 auipc t0,0x0
800001d4: 1c02a283 lw t0,448(t0) # 80000390 <consistancy_b_hart>
800001d8: 00128293 addi t0,t0,1
800001dc: 00000317 auipc t1,0x0
800001e0: 1a532a23 sw t0,436(t1) # 80000390 <consistancy_b_hart>
800001e4: 04829063 bne t0,s0,80000224 <consistancy_increment_fence>
800001e8: 00000317 auipc t1,0x0
800001ec: 1a032423 sw zero,424(t1) # 80000390 <consistancy_b_hart>
800001f0: 00000297 auipc t0,0x0
800001f4: 19c2a283 lw t0,412(t0) # 8000038c <consistancy_a_hart>
800001f8: 00128293 addi t0,t0,1
800001fc: 00000317 auipc t1,0x0
80000200: 18532823 sw t0,400(t1) # 8000038c <consistancy_a_hart>
80000204: 02829063 bne t0,s0,80000224 <consistancy_increment_fence>
80000208: 00000317 auipc t1,0x0
8000020c: 18032223 sw zero,388(t1) # 8000038c <consistancy_a_hart>
80000210: 00000297 auipc t0,0x0
80000214: 1842a283 lw t0,388(t0) # 80000394 <consistancy_all_tested>
80000218: 00128293 addi t0,t0,1
8000021c: 00000317 auipc t1,0x0
80000220: 16532c23 sw t0,376(t1) # 80000394 <consistancy_all_tested>
8000019c <success>:
8000019c: 00000413 li s0,0
800001a0: f80002b7 lui t0,0xf8000
800001a4: 00828293 addi t0,t0,8 # f8000008 <barrier_phase+0x77fffe14>
800001a8: f1402373 csrr t1,mhartid
800001ac: 01031313 slli t1,t1,0x10
800001b0: 006282b3 add t0,t0,t1
800001b4: 0082a023 sw s0,0(t0)
800001b8: 0240006f j 800001dc <end>
80000224 <consistancy_increment_fence>:
80000224: 0130000f fence w,rw
80000228: e7dff06f j 800000a4 <consistancy_loop>
800001bc <failure>:
800001bc: 00100413 li s0,1
800001c0: f80002b7 lui t0,0xf8000
800001c4: 00828293 addi t0,t0,8 # f8000008 <barrier_phase+0x77fffe14>
800001c8: f1402373 csrr t1,mhartid
800001cc: 01031313 slli t1,t1,0x10
800001d0: 006282b3 add t0,t0,t1
800001d4: 0082a023 sw s0,0(t0)
800001d8: 0040006f j 800001dc <end>
8000022c <consistancy_passed>:
8000022c: 1000006f j 8000032c <success>
800001dc <end>:
800001dc: 0000006f j 800001dc <end>
80000230 <barrier_amo>:
80000230: f80002b7 lui t0,0xf8000
80000234: 00c28293 addi t0,t0,12 # f800000c <consistancy_a_readed+0x77fffbc8>
80000238: f1402373 csrr t1,mhartid
8000023c: 01031313 slli t1,t1,0x10
80000240: 006282b3 add t0,t0,t1
80000244: 00a2a023 sw a0,0(t0)
80000248: 00000e97 auipc t4,0x0
8000024c: 13ceae83 lw t4,316(t4) # 80000384 <barrier_phase>
80000250: 00000297 auipc t0,0x0
80000254: 13028293 addi t0,t0,304 # 80000380 <barrier_value>
80000258: 00100313 li t1,1
8000025c: 0062a2af amoadd.w t0,t1,(t0)
80000260: 00128293 addi t0,t0,1
80000264: 00000317 auipc t1,0x0
80000268: 11832303 lw t1,280(t1) # 8000037c <thread_count>
8000026c: 00629c63 bne t0,t1,80000284 <barrier_amo_wait>
80000270: 001e8293 addi t0,t4,1
80000274: 00000317 auipc t1,0x0
80000278: 10032623 sw zero,268(t1) # 80000380 <barrier_value>
8000027c: 00000317 auipc t1,0x0
80000280: 10532423 sw t0,264(t1) # 80000384 <barrier_phase>
800001e0 <sleep>:
800001e0: fff50513 addi a0,a0,-1
800001e4: fe051ee3 bnez a0,800001e0 <sleep>
800001e8: 00008067 ret
80000284 <barrier_amo_wait>:
80000284: 00000297 auipc t0,0x0
80000288: 1002a283 lw t0,256(t0) # 80000384 <barrier_phase>
8000028c: ffd28ce3 beq t0,t4,80000284 <barrier_amo_wait>
80000290: f80002b7 lui t0,0xf8000
80000294: 01028293 addi t0,t0,16 # f8000010 <consistancy_a_readed+0x77fffbcc>
80000298: f1402373 csrr t1,mhartid
8000029c: 01031313 slli t1,t1,0x10
800002a0: 006282b3 add t0,t0,t1
800002a4: 00a2a023 sw a0,0(t0)
800002a8: 00008067 ret
800001ec <thread_count>:
800001ec: 0000 unimp
800002ac <barrier_lrsc>:
800002ac: f80002b7 lui t0,0xf8000
800002b0: 00c28293 addi t0,t0,12 # f800000c <consistancy_a_readed+0x77fffbc8>
800002b4: f1402373 csrr t1,mhartid
800002b8: 01031313 slli t1,t1,0x10
800002bc: 006282b3 add t0,t0,t1
800002c0: 00a2a023 sw a0,0(t0)
800002c4: 00000e97 auipc t4,0x0
800002c8: 0c0eae83 lw t4,192(t4) # 80000384 <barrier_phase>
800002cc: 00000297 auipc t0,0x0
800002d0: 0b428293 addi t0,t0,180 # 80000380 <barrier_value>
800002d4 <barrier_lrsc_try>:
800002d4: 1002a32f lr.w t1,(t0)
800002d8: 00130313 addi t1,t1,1
800002dc: 1862a3af sc.w t2,t1,(t0)
800002e0: fe039ae3 bnez t2,800002d4 <barrier_lrsc_try>
800002e4: 00000297 auipc t0,0x0
800002e8: 0982a283 lw t0,152(t0) # 8000037c <thread_count>
800002ec: 00629c63 bne t0,t1,80000304 <barrier_lrsc_wait>
800002f0: 001e8293 addi t0,t4,1
800002f4: 00000317 auipc t1,0x0
800002f8: 08032623 sw zero,140(t1) # 80000380 <barrier_value>
800002fc: 00000317 auipc t1,0x0
80000300: 08532423 sw t0,136(t1) # 80000384 <barrier_phase>
80000304 <barrier_lrsc_wait>:
80000304: 00000297 auipc t0,0x0
80000308: 0802a283 lw t0,128(t0) # 80000384 <barrier_phase>
8000030c: ffd28ce3 beq t0,t4,80000304 <barrier_lrsc_wait>
80000310: f80002b7 lui t0,0xf8000
80000314: 01028293 addi t0,t0,16 # f8000010 <consistancy_a_readed+0x77fffbcc>
80000318: f1402373 csrr t1,mhartid
8000031c: 01031313 slli t1,t1,0x10
80000320: 006282b3 add t0,t0,t1
80000324: 00a2a023 sw a0,0(t0)
80000328: 00008067 ret
8000032c <success>:
8000032c: 00000413 li s0,0
80000330: f80002b7 lui t0,0xf8000
80000334: 00828293 addi t0,t0,8 # f8000008 <consistancy_a_readed+0x77fffbc4>
80000338: f1402373 csrr t1,mhartid
8000033c: 01031313 slli t1,t1,0x10
80000340: 006282b3 add t0,t0,t1
80000344: 0082a023 sw s0,0(t0)
80000348: 0240006f j 8000036c <end>
8000034c <failure>:
8000034c: 00100413 li s0,1
80000350: f80002b7 lui t0,0xf8000
80000354: 00828293 addi t0,t0,8 # f8000008 <consistancy_a_readed+0x77fffbc4>
80000358: f1402373 csrr t1,mhartid
8000035c: 01031313 slli t1,t1,0x10
80000360: 006282b3 add t0,t0,t1
80000364: 0082a023 sw s0,0(t0)
80000368: 0040006f j 8000036c <end>
8000036c <end>:
8000036c: 0000006f j 8000036c <end>
80000370 <sleep>:
80000370: fff50513 addi a0,a0,-1
80000374: fe051ee3 bnez a0,80000370 <sleep>
80000378: 00008067 ret
8000037c <thread_count>:
8000037c: 0000 unimp
...
800001f0 <barrier_value>:
800001f0: 0000 unimp
80000380 <barrier_value>:
80000380: 0000 unimp
...
800001f4 <barrier_phase>:
800001f4: 0000 unimp
80000384 <barrier_phase>:
80000384: 0000 unimp
...
80000388 <barrier_allocator>:
80000388: 1000 addi s0,sp,32
...
8000038c <consistancy_a_hart>:
8000038c: 0000 unimp
...
80000390 <consistancy_b_hart>:
80000390: 0000 unimp
...
80000394 <consistancy_all_tested>:
80000394: 0000 unimp
80000396: 0000 unimp
80000398: 00000013 nop
8000039c: 00000013 nop
800003a0: 00000013 nop
800003a4: 00000013 nop
800003a8: 00000013 nop
800003ac: 00000013 nop
800003b0: 00000013 nop
800003b4: 00000013 nop
800003b8: 00000013 nop
800003bc: 00000013 nop
800003c0: 00000013 nop
800003c4: 00000013 nop
800003c8: 00000013 nop
800003cc: 00000013 nop
800003d0: 00000013 nop
800003d4: 00000013 nop
800003d8: 00000013 nop
800003dc: 00000013 nop
800003e0: 00000013 nop
800003e4: 00000013 nop
800003e8: 00000013 nop
800003ec: 00000013 nop
800003f0: 00000013 nop
800003f4: 00000013 nop
800003f8: 00000013 nop
800003fc: 00000013 nop
80000400 <consistancy_a_value>:
80000400: 0000 unimp
...
80000404 <consistancy_b_value>:
80000404: 0000 unimp
80000406: 0000 unimp
80000408: 00000013 nop
8000040c: 00000013 nop
80000410: 00000013 nop
80000414: 00000013 nop
80000418: 00000013 nop
8000041c: 00000013 nop
80000420: 00000013 nop
80000424: 00000013 nop
80000428: 00000013 nop
8000042c: 00000013 nop
80000430: 00000013 nop
80000434: 00000013 nop
80000438: 00000013 nop
8000043c: 00000013 nop
80000440 <consistancy_b_readed>:
80000440: 0000 unimp
...
80000444 <consistancy_a_readed>:
...

Binary file not shown.

View File

@ -1,9 +1,13 @@
#define CONSISTENCY_REDO_COUNT 10
#define REPORT_OFFSET 0xF8000000
#define REPORT_THREAD_ID 0x00
#define REPORT_THREAD_COUNT 0x04
#define REPORT_END 0x08
#define REPORT_BARRIER_START 0x0C
#define REPORT_BARRIER_END 0x10
#define REPORT_CONSISTENCY_VALUES 0x14
#define report(reg, id) \
li t0, REPORT_OFFSET+id; \
@ -54,6 +58,92 @@ barrier_amo_test:
call barrier_lrsc
lw gp, barrier_allocator
consistancy_loop:
//Sync
mv a0, gp
addi gp, gp, 1
call barrier_lrsc
//all passed ?
lw t0, consistancy_all_tested
li t1, CONSISTENCY_REDO_COUNT
bge t0, t1, consistancy_passed
//identify who is A, who is B
lw t0, consistancy_a_hart
lw t1, consistancy_b_hart
beq t0, t1, consistancy_join
csrr t0, mhartid
lw t1, consistancy_a_hart
la s0, consistancy_a_value
la s1, consistancy_b_value
beq t0, t1, consistancy_do
lw t1, consistancy_b_hart
la s0, consistancy_b_value
la s1, consistancy_a_value
beq t0, t1, consistancy_do
consistancy_hart_not_involved:
mv a0, gp
addi gp, gp, 1
call barrier_lrsc
j consistancy_join
consistancy_do:
li s2, 666
mv a0, gp
addi gp, gp, 1
lw s3, (s1) //Help getting the cache loaded for the consistancy check
call barrier_lrsc
//Consistancy check : write to read ordering on two thread
sw s2, (s0)
fence w,r
lw s3, (s1)
sw s3, 64(s0)
consistancy_join:
fence rw, rw //ensure updated values
mv a0, gp
addi gp, gp, 1
call barrier_lrsc
csrr t0, mhartid
bnez t0, consistancy_loop
consistancy_assert:
lw t0, consistancy_a_hart
lw t1, consistancy_b_hart
beq t0, t1, consistancy_increment
lw a0, consistancy_a_readed
report(a0, REPORT_CONSISTENCY_VALUES)
lw a0, consistancy_b_readed
report(a0, REPORT_CONSISTENCY_VALUES)
consistancy_increment:
csrr t0, mhartid
bnez t0, consistancy_loop
sw x0, (consistancy_a_value), t0
sw x0, (consistancy_b_value), t0
lw s0,thread_count
lw t0,consistancy_b_hart
addi t0, t0, 1
sw t0, consistancy_b_hart, t1
bne t0, s0, consistancy_increment_fence
sw x0, consistancy_b_hart, t1
lw t0,consistancy_a_hart
addi t0, t0, 1
sw t0, consistancy_a_hart, t1
bne t0, s0, consistancy_increment_fence
sw x0, consistancy_a_hart, t1
lw t0, consistancy_all_tested
addi t0, t0, 1
sw t0, consistancy_all_tested, t1
consistancy_increment_fence:
fence w, rw
j consistancy_loop
consistancy_passed:
j success
@ -78,24 +168,25 @@ barrier_amo_wait:
ret
barrier_lrsc:
report(a0, REPORT_BARRIER_START)
lw ENTRY_PHASE, barrier_phase
la t0, barrier_value
report(a0, REPORT_BARRIER_START)
lw ENTRY_PHASE, barrier_phase
la t0, barrier_value
barrier_lrsc_try:
lr.w t1, (t0)
addi t1, t1, 1
sc.w t2, t1, (t0)
bnez t2, barrier_lrsc_try
lw t0, thread_count
bne t0, t1, barrier_lrsc_wait
addi t0,ENTRY_PHASE,1
sw x0, barrier_value, t1
sw t0, barrier_phase, t1
lr.w t1, (t0)
addi t1, t1, 1
sc.w t2, t1, (t0)
bnez t2, barrier_lrsc_try
lw t0, thread_count
bne t0, t1, barrier_lrsc_wait
addi t0,ENTRY_PHASE,1
sw x0, barrier_value, t1
sw t0, barrier_phase, t1
barrier_lrsc_wait:
lw t0, barrier_phase
beq t0, ENTRY_PHASE, barrier_lrsc_wait
report(a0, REPORT_BARRIER_END)
ret
lw t0, barrier_phase
beq t0, ENTRY_PHASE, barrier_lrsc_wait
report(a0, REPORT_BARRIER_END)
ret
@ -120,5 +211,27 @@ sleep:
thread_count: .word 0
.align 6 //Same cache line
barrier_value: .word 0
barrier_phase: .word 0
barrier_allocator: .word 0x1000
consistancy_a_hart: .word 0
consistancy_b_hart: .word 0
consistancy_all_tested: .word 0
nop;nop;nop;nop;nop;nop;nop;nop;
nop;nop;nop;nop;nop;nop;nop;nop;
.align 6 //Same cache line
consistancy_a_value: .word 0
consistancy_b_value: .word 0
.align 6 //Same cache line
consistancy_b_readed: .word 0
consistancy_a_readed: .word 0
.align 6 //Same cache line
consistancy_init_call: .word 0
consistancy_do_call: .word 0