Add hardware AMO, require AMO=yes in sim and withAmo=true in linux.scala

This commit is contained in:
Charles Papon 2019-04-09 01:22:32 +02:00
parent 21cb8615fd
commit fd42e7701e
5 changed files with 196 additions and 80 deletions

View File

@ -62,6 +62,16 @@ object Riscv{
def LR = M"00010--00000-----010-----0101111"
def SC = M"00011------------010-----0101111"
def AMOSWAP = M"00001------------010-----0101111"
def AMOADD = M"00000------------010-----0101111"
def AMOXOR = M"00100------------010-----0101111"
def AMOAND = M"01100------------010-----0101111"
def AMOOR = M"01000------------010-----0101111"
def AMOMIN = M"10000------------010-----0101111"
def AMOMAX = M"10100------------010-----0101111"
def AMOMINU = M"11000------------010-----0101111"
def AMOMAXU = M"11100------------010-----0101111"
def BEQ (rvc : Boolean) = if(rvc) M"-----------------000-----1100011" else M"-----------------000---0-1100011"
def BNE (rvc : Boolean) = if(rvc) M"-----------------001-----1100011" else M"-----------------001---0-1100011"
def BLT (rvc : Boolean) = if(rvc) M"-----------------100-----1100011" else M"-----------------100---0-1100011"

View File

@ -21,7 +21,8 @@ case class DataCacheConfig(cacheSize : Int,
earlyWaysHits : Boolean = true,
earlyDataMux : Boolean = false,
tagSizeShift : Int = 0, //Used to force infering ram
withLrSc : Boolean = false){
withLrSc : Boolean = false,
withAmo : Boolean = false){
assert(!(earlyDataMux && !earlyWaysHits))
def burstSize = bytePerLine*8/memDataWidth
@ -83,12 +84,15 @@ case class DataCacheCpuExecute(p : DataCacheConfig) extends Bundle with IMasterS
case class DataCacheCpuExecuteArgs(p : DataCacheConfig) extends Bundle{
val wr = Bool
//val address = UInt(p.addressWidth bit) Given on the side, as it's also part of the main pipeline
val data = Bits(p.cpuDataWidth bit)
val size = UInt(2 bits)
val forceUncachedAccess = Bool
val isAtomic = ifGen(p.withLrSc){Bool}
// val all = Bool //Address should be zero when "all" is used
val forceUncachedAccess = Bool()
val isLrsc = p.withLrSc generate Bool()
val isAmo = p.withAmo generate Bool()
val amoCtrl = p.withAmo generate new Bundle {
val swap = Bool()
val alu = Bits(3 bits)
}
}
case class DataCacheCpuMemory(p : DataCacheConfig) extends Bundle with IMasterSlave{
@ -114,14 +118,14 @@ case class DataCacheCpuWriteBack(p : DataCacheConfig) extends Bundle with IMaste
val data = Bits(p.cpuDataWidth bit)
val address = UInt(p.addressWidth bit)
val mmuException, unalignedAccess , accessError = Bool
val clearAtomicEntries = ifGen(p.withLrSc) {Bool}
val clearLrsc = ifGen(p.withLrSc) {Bool}
// val exceptionBus = if(p.catchSomething) Flow(ExceptionCause()) else null
override def asMaster(): Unit = {
out(isValid,isStuck,isUser, address)
in(haltIt, data, mmuException, unalignedAccess, accessError, isWrite)
outWithNull(clearAtomicEntries)
outWithNull(clearLrsc)
}
}
@ -347,9 +351,6 @@ class DataCache(p : DataCacheConfig) extends Component{
io.mem.cmd.valid := False
io.mem.cmd.payload.assignDontCare()
val ways = for(i <- 0 until wayCount) yield new Area{
val tags = Mem(new LineInfo(), wayLineCount)
val data = Mem(Bits(wordWidth bit), wayWordCount)
@ -465,78 +466,104 @@ class DataCache(p : DataCacheConfig) extends Component{
}
val atomic = withLrSc generate new Area{
case class AtomicEntry() extends Bundle{
val valid = Bool()
val address = UInt(addressWidth bits)
def init: this.type ={
valid init(False)
this
}
}
val lrsc = withLrSc generate new Area{
val reserved = RegInit(False)
when(io.cpu.writeBack.isValid && !io.cpu.writeBack.isStuck && !io.cpu.redo && request.isAtomic && !request.wr){
when(io.cpu.writeBack.isValid && !io.cpu.writeBack.isStuck && !io.cpu.redo && request.isLrsc && !request.wr){
reserved := True
}
when(io.cpu.writeBack.clearAtomicEntries){
when(io.cpu.writeBack.clearLrsc){
reserved := False
}
}
val memCmdSent = RegInit(False) setWhen (io.mem.cmd.ready) clearWhen (!io.cpu.writeBack.isStuck)
val requestDataBypass = CombInit(request.data)
val isAmo = if(withAmo) request.isAmo else False
val amo = withAmo generate new Area{
def rf = request.data
def mem = dataMux
val compare = request.amoCtrl.alu.msb
val unsigned = request.amoCtrl.alu(2 downto 1) === B"11"
val addSub = (rf.asSInt + Mux(compare, ~mem, mem).asSInt + Mux(compare, S(1), S(0))).asBits
val less = Mux(rf.msb === mem.msb, addSub.msb, Mux(unsigned, mem.msb, rf.msb))
val selectRf = request.amoCtrl.swap ? True | (request.amoCtrl.alu.lsb ^ less)
val result = (request.amoCtrl.alu | (request.amoCtrl.swap ## B"00")).mux(
B"000" -> addSub,
B"001" -> (rf ^ mem),
B"010" -> (rf | mem),
B"011" -> (rf & mem),
default -> (selectRf ? rf | mem)
)
val resultRegValid = RegNext(True) clearWhen(!io.cpu.writeBack.isStuck)
val resultReg = RegNext(result)
}
val memCmdSent = RegInit(False) setWhen (io.mem.cmd.ready) clearWhen (!io.cpu.writeBack.isStuck)
io.cpu.redo := False
io.cpu.writeBack.accessError := False
io.cpu.writeBack.mmuException := io.cpu.writeBack.isValid && (if(catchIllegal) mmuRsp.exception || (!mmuRsp.allowWrite && request.wr) || (!mmuRsp.allowRead && !request.wr) else False)
io.cpu.writeBack.mmuException := io.cpu.writeBack.isValid && (if(catchIllegal) mmuRsp.exception || (!mmuRsp.allowWrite && request.wr) || (!mmuRsp.allowRead && (!request.wr || isAmo)) else False)
io.cpu.writeBack.unalignedAccess := io.cpu.writeBack.isValid && (if(catchUnaligned) ((request.size === 2 && mmuRsp.physicalAddress(1 downto 0) =/= 0) || (request.size === 1 && mmuRsp.physicalAddress(0 downto 0) =/= 0)) else False)
io.cpu.writeBack.isWrite := request.wr
io.mem.cmd.valid := False
io.mem.cmd.address.assignDontCare()
io.mem.cmd.length.assignDontCare()
io.mem.cmd.last.assignDontCare()
io.mem.cmd.wr := request.wr
io.mem.cmd.mask := mask
io.mem.cmd.data := requestDataBypass
when(io.cpu.writeBack.isValid) {
when(request.forceUncachedAccess || mmuRsp.isIoAccess) {
io.cpu.writeBack.haltIt.clearWhen(request.wr ? io.mem.cmd.ready | io.mem.rsp.valid)
io.mem.cmd.valid := !memCmdSent
io.mem.cmd.wr := request.wr
io.mem.cmd.address := mmuRsp.physicalAddress(tagRange.high downto wordRange.low) @@ U(0, wordRange.low bit)
io.mem.cmd.mask := mask
io.mem.cmd.data := request.data
io.mem.cmd.length := 0
io.mem.cmd.last := True
if(withLrSc) when(request.isAtomic && !atomic.reserved){
if(withLrSc) when(request.isLrsc && !lrsc.reserved){
io.mem.cmd.valid := False
io.cpu.writeBack.haltIt := False
}
} otherwise {
when(waysHit || request.wr) { //Do not require a cache refill ?
when(waysHit || request.wr && !isAmo) { //Do not require a cache refill ?
//Data cache update
dataWriteCmd.valid setWhen(request.wr && waysHit)
dataWriteCmd.address := mmuRsp.physicalAddress(lineRange.high downto wordRange.low)
dataWriteCmd.data := request.data
dataWriteCmd.data := requestDataBypass
dataWriteCmd.mask := mask
dataWriteCmd.way := waysHits
//Write through
io.mem.cmd.valid setWhen(request.wr)
io.mem.cmd.wr := True
io.mem.cmd.address := mmuRsp.physicalAddress(tagRange.high downto wordRange.low) @@ U(0, wordRange.low bit)
io.mem.cmd.mask := mask
io.mem.cmd.data := request.data
io.mem.cmd.length := 0
io.mem.cmd.last := True
io.cpu.writeBack.haltIt clearWhen(!request.wr || io.mem.cmd.ready)
//On write to read colisions
io.cpu.redo := !request.wr && (colisions & waysHits) =/= 0
if(withAmo) when(isAmo){
when(!amo.resultRegValid) {
io.mem.cmd.valid := False
dataWriteCmd.valid := False
io.cpu.writeBack.haltIt := True
}
}
if(withLrSc) when(request.isAtomic && !atomic.reserved){
//On write to read colisions
when((!request.wr || isAmo) && (colisions & waysHits) =/= 0){
io.cpu.redo := True
if(withAmo) io.mem.cmd.valid := False
}
if(withLrSc) when(request.isLrsc && !lrsc.reserved){
io.mem.cmd.valid := False
dataWriteCmd.valid := False
io.cpu.writeBack.haltIt := False
}
} otherwise { //Do refill
//Emit cmd
io.mem.cmd.valid setWhen(!memCmdSent)
io.mem.cmd.wr := False
@ -570,8 +597,13 @@ class DataCache(p : DataCacheConfig) extends Component{
assert(!(io.cpu.writeBack.isValid && !io.cpu.writeBack.haltIt && io.cpu.writeBack.isStuck), "writeBack stuck by another plugin is not allowed")
if(withLrSc){
when(request.isAtomic && request.wr){
io.cpu.writeBack.data := (!atomic.reserved).asBits.resized
when(request.isLrsc && request.wr){
io.cpu.writeBack.data := (!lrsc.reserved).asBits.resized
}
}
if(withAmo){
when(request.isAmo){
requestDataBypass := amo.resultReg
}
}
}

View File

@ -46,7 +46,8 @@ class DBusCachedPlugin(config : DataCacheConfig,
object MEMORY_MANAGMENT extends Stageable(Bool)
object MEMORY_WR extends Stageable(Bool)
object MEMORY_ADDRESS_LOW extends Stageable(UInt(2 bits))
object MEMORY_ATOMIC extends Stageable(Bool)
object MEMORY_LRSC extends Stageable(Bool)
object MEMORY_AMO extends Stageable(Bool)
object IS_DBUS_SHARING extends Stageable(Bool())
override def setup(pipeline: VexRiscv): Unit = {
@ -85,13 +86,13 @@ class DBusCachedPlugin(config : DataCacheConfig,
if(withLrSc){
List(LB, LH, LW, LBU, LHU, LWU, SB, SH, SW).foreach(e =>
decoderService.add(e, Seq(MEMORY_ATOMIC -> False))
decoderService.add(e, Seq(MEMORY_LRSC -> False))
)
decoderService.add(
key = LR,
values = loadActions.filter(_._1 != SRC2_CTRL) ++ Seq(
SRC_ADD_ZERO -> True,
MEMORY_ATOMIC -> True
MEMORY_LRSC -> True
)
)
decoderService.add(
@ -101,11 +102,37 @@ class DBusCachedPlugin(config : DataCacheConfig,
REGFILE_WRITE_VALID -> True,
BYPASSABLE_EXECUTE_STAGE -> False,
BYPASSABLE_MEMORY_STAGE -> False,
MEMORY_ATOMIC -> True
MEMORY_LRSC -> True
)
)
}
if(withAmo){
List(LB, LH, LW, LBU, LHU, LWU, SB, SH, SW).foreach(e =>
decoderService.add(e, Seq(MEMORY_AMO -> False))
)
val amoActions = storeActions.filter(_._1 != SRC2_CTRL) ++ Seq(
SRC_ADD_ZERO -> True,
REGFILE_WRITE_VALID -> True,
BYPASSABLE_EXECUTE_STAGE -> False,
BYPASSABLE_MEMORY_STAGE -> False,
MEMORY_AMO -> True
)
for(i <- List(AMOSWAP, AMOADD, AMOXOR, AMOAND, AMOOR, AMOMIN, AMOMAX, AMOMINU, AMOMAXU)){
decoderService.add(i, amoActions)
}
}
if(withAmo && withLrSc){
for(i <- List(AMOSWAP, AMOADD, AMOXOR, AMOAND, AMOOR, AMOMIN, AMOMAX, AMOMINU, AMOMAXU)){
decoderService.add(i, List(MEMORY_LRSC -> False))
}
for(i <- List(LR, SC)){
decoderService.add(i, List(MEMORY_AMO -> False))
}
}
def MANAGEMENT = M"-------00000-----101-----0001111"
decoderService.addDefault(MEMORY_MANAGMENT, False)
@ -123,26 +150,6 @@ class DBusCachedPlugin(config : DataCacheConfig,
if(pipeline.serviceExist(classOf[PrivilegeService]))
privilegeService = pipeline.service(classOf[PrivilegeService])
// if(pipeline.serviceExist(classOf[ReportService])){
// val report = pipeline.service(classOf[ReportService])
// report.add("dBus" -> {
// val e = new BusReport()
// val c = new CacheReport()
// e.kind = "cached"
// e.flushInstructions.add(0x13 | (1 << 7)) ////ADDI x1, x0, 0
// for(idx <- 0 until cacheSize by bytePerLine){
// e.flushInstructions.add(0x7000500F + (1 << 15)) //Clean invalid data cache way x1
// e.flushInstructions.add(0x13 + (1 << 7) + (1 << 15) + (bytePerLine << 20)) //ADDI x1, x1, 32
// }
//
// e.info = c
// c.size = cacheSize
// c.bytePerLine = bytePerLine
//
// e
// })
// }
}
override def build(pipeline: VexRiscv): Unit = {
@ -182,16 +189,23 @@ class DBusCachedPlugin(config : DataCacheConfig,
cache.io.cpu.execute.args.size := size
cache.io.cpu.execute.args.forceUncachedAccess := False
cache.io.cpu.flush.valid := arbitration.isValid && input(MEMORY_MANAGMENT)
arbitration.haltItself setWhen(cache.io.cpu.flush.isStall)
if(withLrSc) {
cache.io.cpu.execute.args.isAtomic := False
when(input(MEMORY_ATOMIC)){
cache.io.cpu.execute.args.isAtomic := True
cache.io.cpu.execute.args.isLrsc := False
when(input(MEMORY_LRSC)){
cache.io.cpu.execute.args.isLrsc := True
}
}
if(withAmo){
cache.io.cpu.execute.isAmo := input(MEMORY_AMO)
cache.io.cpu.execute.amoCtrl.alu := input(INSTRUCTION)(31 downto 29)
cache.io.cpu.execute.amoCtrl.swap := input(INSTRUCTION)(27)
}
insert(MEMORY_ADDRESS_LOW) := cache.io.cpu.execute.address(1 downto 0)
when(cache.io.cpu.redo && arbitration.isValid && input(MEMORY_ENABLE)){
@ -215,7 +229,7 @@ class DBusCachedPlugin(config : DataCacheConfig,
cache.io.cpu.writeBack.isStuck := arbitration.isStuck
cache.io.cpu.writeBack.isUser := (if(privilegeService != null) privilegeService.isUser() else False)
cache.io.cpu.writeBack.address := U(input(REGFILE_WRITE_DATA))
if(withLrSc) cache.io.cpu.writeBack.clearAtomicEntries := service(classOf[IContextSwitching]).isContextSwitching
if(withLrSc) cache.io.cpu.writeBack.clearLrsc := service(classOf[IContextSwitching]).isContextSwitching
redoBranch.valid := False
redoBranch.payload := input(PC)
@ -285,7 +299,8 @@ class DBusCachedPlugin(config : DataCacheConfig,
cache.io.cpu.execute.args.data := dBusAccess.cmd.data
cache.io.cpu.execute.args.size := dBusAccess.cmd.size
cache.io.cpu.execute.args.forceUncachedAccess := False
if(withLrSc) cache.io.cpu.execute.args.isAtomic := False
if(withLrSc) cache.io.cpu.execute.args.isLrsc := False
if(withAmo) cache.io.cpu.execute.args.isAmo := False
cache.io.cpu.execute.address := dBusAccess.cmd.address //Will only be 12 muxes
forceDatapath := True
}

View File

@ -234,6 +234,7 @@ public:
int32_t pc, lastPc;
uint32_t lastInstruction;
int32_t regs[32];
uint64_t stepCounter;
uint32_t mscratch, sscratch;
uint32_t misa;
@ -400,6 +401,7 @@ public:
satp.mode = 0;
ipSoft = 0;
ipInput = 0;
stepCounter = 0;
}
virtual void rfWrite(int32_t address, int32_t data) {
@ -422,7 +424,7 @@ public:
virtual bool dRead(int32_t address, int32_t size, uint32_t *data) = 0;
virtual void dWrite(int32_t address, int32_t size, uint32_t data) = 0;
enum AccessKind {READ,WRITE,EXECUTE};
enum AccessKind {READ,WRITE,EXECUTE,READ_WRITE};
virtual bool isMmuRegion(uint32_t v) = 0;
bool v2p(uint32_t v, uint32_t *p, AccessKind kind){
uint32_t effectivePrivilege = status.mprv && kind != EXECUTE ? status.mpp : privilege;
@ -441,11 +443,10 @@ public:
if(!tlb.u && effectivePrivilege == 0) return true;
if( tlb.u && effectivePrivilege == 1 && !status.sum) return true;
if(superPage && tlb.ppn0 != 0) return true;
switch(kind){
case READ: if(!tlb.r && !(status.mxr && tlb.x)) return true; break;
case WRITE: if(!tlb.w) return true; break;
case EXECUTE: if(!tlb.x) return true; break;
}
if(kind == READ || kind == READ_WRITE) if(!tlb.r && !(status.mxr && tlb.x)) return true;
if(kind == WRITE || kind == READ_WRITE) if(!tlb.w) return true;
if(kind == EXECUTE) if(!tlb.x) return true;
*p = (tlb.ppn1 << 22) | (superPage ? v & 0x3FF000 : tlb.ppn0 << 12) | (v & 0xFFF);
}
return false;
@ -654,6 +655,7 @@ public:
virtual void step() {
stepCounter++;
livenessStep = 0;
#define rd32 ((i >> 7) & 0x1F)
#define iBits(lo, len) ((i >> lo) & ((1 << len)-1))
@ -907,7 +909,39 @@ public:
pcWrite(pc + 4);
}
} break;
default: ilegalInstruction(); break;
default: {
#ifndef AMO
ilegalInstruction();
#else
uint32_t sel = (i >> 27) & 0x1F;
uint32_t addr = i32_rs1;
int32_t src = i32_rs2;
int32_t readValue;
uint32_t pAddr;
if(v2p(addr, &pAddr, READ_WRITE)){ trap(0, 15, addr); return; }
if(dRead(pAddr, 4, (uint32_t*)&readValue)){
trap(0, 15, addr); return;
return;
}
int writeValue;
switch(sel){
case 0x0: writeValue = src + readValue; break;
case 0x1: writeValue = src; break;
case 0x4: writeValue = src ^ readValue; break;
case 0xC: writeValue = src & readValue; break;
case 0x8: writeValue = src | readValue; break;
case 0x10: writeValue = min(src, readValue); break;
case 0x14: writeValue = max(src, readValue); break;
case 0x18: writeValue = min((unsigned int)src, (unsigned int)readValue); break;
case 0x1C: writeValue = max((unsigned int)src, (unsigned int)readValue); break;
default: ilegalInstruction(); return; break;
}
dWrite(pAddr, 4, writeValue);
rfWrite(rd32, readValue);
pcWrite(pc + 4);
#endif
} break;
}
break;
default: ilegalInstruction(); break;
@ -1374,6 +1408,8 @@ public:
if(i >= TRACE_START) tfp->dump(i);
#endif
}
uint64_t privilegeCounters[4] = {0,0,0,0};
Workspace* run(uint64_t timeout = 5000){
// cout << "Start " << name << endl;
if(timeout == 0) timeout = 0x7FFFFFFFFFFFFFFF;
@ -1513,6 +1549,13 @@ public:
#endif
if(top->VexRiscv->writeBack_arbitration_isFiring){
if(riscvRefEnable) {
// privilegeCounters[riscvRef.privilege]++;
// if((riscvRef.stepCounter & 0xFFFFF) == 0){
// cout << "privilege report" << endl;
// cout << "- U " << privilegeCounters[0] << endl;
// cout << "- S " << privilegeCounters[1] << endl;
// cout << "- M " << privilegeCounters[3] << endl;
// }
riscvRef.step();
bool mIntTimer = false;
bool mIntExt = false;
@ -3456,7 +3499,8 @@ int main(int argc, char **argv, char **env) {
->setDStall(true)
->bootAt(0x80000000)
->run(0);
// ->run(1173000000l );
// ->run((496300000l + 2000000) / 2);
// ->run(438700000l/2);
#endif
// #ifdef MMU
@ -3474,6 +3518,7 @@ int main(int argc, char **argv, char **env) {
#ifdef RUN_HEX
//w.loadHex("/home/spinalvm/hdl/zephyr/zephyrSpinalHdl/samples/synchronization/build/zephyr/zephyr.hex");
w.loadHex(RUN_HEX);
w.withRiscvRef();
#endif
w.noInstructionReadCheck();
//w.setIStall(false);
@ -3611,18 +3656,22 @@ int main(int argc, char **argv, char **env) {
Dhrystone("dhrystoneO3MC_Stall","dhrystoneO3MC",true,true).run(1.9e6);
#endif
#endif
Dhrystone("dhrystoneO3","dhrystoneO3",false,false).run(1.9e6);
#if defined(COMPRESSED)
Dhrystone("dhrystoneO3C","dhrystoneO3C",false,false).run(1.9e6);
#endif
Dhrystone("dhrystoneO3","dhrystoneO3",false,false).run(1.9e6);
#if defined(MUL) && defined(DIV)
Dhrystone("dhrystoneO3M","dhrystoneO3M",false,false).run(1.9e6);
#if defined(COMPRESSED)
Dhrystone("dhrystoneO3MC","dhrystoneO3MC",false,false).run(1.9e6);
#endif
Dhrystone("dhrystoneO3M","dhrystoneO3M",false,false).run(1.9e6);
#endif
#endif
#ifdef COREMARK
Dhrystone("coremark","/home/miaou/pro/riscv/coremark/coremark",false,false).run(1.9e6);
#endif
#ifdef FREERTOS
#ifdef SEED

View File

@ -15,6 +15,7 @@ FENCEI?=no
MMU?=yes
SEED?=no
ATOMIC?=no
AMO?=no
NO_STALL?=no
DEBUG_PLUGIN?=STD
DEBUG_PLUGIN_EXTERNAL?=no
@ -32,7 +33,7 @@ MTIME_INSTR_FACTOR?=no
COMPRESSED?=no
SUPERVISOR?=no
STOP_ON_ERROR?=no
COREMARK=no
ADDCFLAGS += -CFLAGS -DIBUS_${IBUS}
ADDCFLAGS += -CFLAGS -DDBUS_${DBUS}
@ -42,7 +43,7 @@ ADDCFLAGS += -CFLAGS -pthread
ADDCFLAGS += -CFLAGS -DTHREAD_COUNT=${THREAD_COUNT}
ifeq ($(DEBUG),yes)
ADDCFLAGS += -CFLAGS -O0 -CFLAGS -g
ADDCFLAGS += -CFLAGS -Og -CFLAGS -g
else
ADDCFLAGS += -CFLAGS -O3 -O3
endif
@ -70,6 +71,11 @@ ifeq ($(FLOW_INFO),yes)
endif
ifeq ($(COREMARK),yes)
ADDCFLAGS += -CFLAGS -DCOREMARK
endif
ifneq ($(shell grep timerInterrupt ../../../../VexRiscv.v -w),)
ADDCFLAGS += -CFLAGS -DTIMER_INTERRUPT
@ -141,6 +147,10 @@ ifeq ($(ATOMIC),yes)
ADDCFLAGS += -CFLAGS -DATOMIC
endif
ifeq ($(AMO),yes)
ADDCFLAGS += -CFLAGS -DAMO
endif
ifeq ($(CUSTOM_SIMD_ADD),yes)
ADDCFLAGS += -CFLAGS -DCUSTOM_SIMD_ADD
endif