fpu fix exception flag handeling

This commit is contained in:
Dolu1990 2021-02-19 13:03:48 +01:00
parent e504afbf18
commit 3f226b758c
5 changed files with 94 additions and 63 deletions

View File

@ -125,6 +125,8 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
val scrap = Bool() val scrap = Bool()
val roundMode = FpuRoundMode() val roundMode = FpuRoundMode()
val format = p.withDouble generate FpuFormat() val format = p.withDouble generate FpuFormat()
val NV = Bool()
val DZ = Bool() //TODO
} }
case class RoundOutput() extends Bundle{ case class RoundOutput() extends Bundle{
@ -133,6 +135,8 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
val rd = p.rfAddress() val rd = p.rfAddress()
val value = p.internalFloating() val value = p.internalFloating()
val format = p.withDouble generate FpuFormat() val format = p.withDouble generate FpuFormat()
val NV, NX, OF, UF, DZ = Bool()
val write = Bool()
} }
val rf = new Area{ val rf = new Area{
@ -153,20 +157,20 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
val lockFreeId = OHMasking.first(lock.map(!_.valid)) val lockFreeId = OHMasking.first(lock.map(!_.valid))
} }
val completion = for(source <- 0 until portCount) yield new Area{ // val completion = for(source <- 0 until portCount) yield new Area{
def port = io.port(source) // def port = io.port(source)
port.completion.flag.NV := False // port.completion.flag.NV := False
port.completion.flag.DZ := False // port.completion.flag.DZ := False
port.completion.flag.OF := False // port.completion.flag.OF := False
port.completion.flag.UF := False // port.completion.flag.UF := False
port.completion.flag.NX := False // port.completion.flag.NX := False
//
val increments = ArrayBuffer[Bool]() // val increments = ArrayBuffer[Bool]()
//
afterElaboration{ // afterElaboration{
port.completion.count := increments.map(_.asUInt.resize(log2Up(increments.size + 1))).reduceBalancedTree(_ + _) // port.completion.count := increments.map(_.asUInt.resize(log2Up(increments.size + 1))).reduceBalancedTree(_ + _)
} // }
} // }
val commitFork = new Area{ val commitFork = new Area{
val load, commit = Vec(Stream(FpuCommit(p)), portCount) val load, commit = Vec(Stream(FpuCommit(p)), portCount)
@ -522,6 +526,8 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
output.value.mantissa := recoded.mantissa @@ U"0" output.value.mantissa := recoded.mantissa @@ U"0"
output.value.special := recoded.special output.value.special := recoded.special
output.scrap := False output.scrap := False
output.NV := False
output.DZ := False
when(input.i2f){ when(input.i2f){
output.value.sign := i2fSign output.value.sign := i2fSign
output.value.exponent := (U(exponentOne+31) - fsm.shift.by).resized output.value.exponent := (U(exponentOne+31) - fsm.shift.by).resized
@ -534,6 +540,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
output.value.mantissa := U(i2fHigh) @@ (if(p.withDouble) U"0" else U"") output.value.mantissa := U(i2fHigh) @@ (if(p.withDouble) U"0" else U"")
} }
} }
} }
val shortPip = new Area{ val shortPip = new Area{
@ -543,8 +550,6 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
val result = p.storeLoadType().assignDontCare() val result = p.storeLoadType().assignDontCare()
val flag = io.port(input.source).completion.flag
val halt = False val halt = False
val recodedResult = p.storeLoadType() val recodedResult = p.storeLoadType()
val f32 = new Area{ val f32 = new Area{
@ -677,7 +682,8 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
} }
} }
val rspNv = False
val rspNx = False
val f2i = new Area{ //Will not work for 64 bits float max value rounding val f2i = new Area{ //Will not work for 64 bits float max value rounding
val unsigned = fsm.shift.output(32 downto 0) >> 1 val unsigned = fsm.shift.output(32 downto 0) >> 1
@ -703,9 +709,9 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
val low = overflow val low = overflow
val high = input.arg(0) ^ overflow val high = input.arg(0) ^ overflow
result := (31 -> high, default -> low) result := (31 -> high, default -> low)
flag.NV := input.valid && input.opcode === FpuOpcode.F2I && fsm.done && !isZero rspNv := input.valid && input.opcode === FpuOpcode.F2I && fsm.done && !isZero
} otherwise { } otherwise {
flag.NX := input.valid && input.opcode === FpuOpcode.F2I && fsm.done && round =/= 0 rspNx := input.valid && input.opcode === FpuOpcode.F2I && fsm.done && round =/= 0
} }
} }
@ -805,9 +811,9 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
val rs2Nan = input.rs2.isNan val rs2Nan = input.rs2.isNan
val rs1NanNv = input.rs1.isNan && (!input.rs1.isQuiet || signalQuiet) val rs1NanNv = input.rs1.isNan && (!input.rs1.isQuiet || signalQuiet)
val rs2NanNv = input.rs2.isNan && (!input.rs2.isQuiet || signalQuiet) val rs2NanNv = input.rs2.isNan && (!input.rs2.isQuiet || signalQuiet)
val nv = List(FpuOpcode.CMP, FpuOpcode.MIN_MAX, FpuOpcode.FCVT_X_X).map(input.opcode === _).orR && rs1NanNv || val NV = List(FpuOpcode.CMP, FpuOpcode.MIN_MAX, FpuOpcode.FCVT_X_X).map(input.opcode === _).orR && rs1NanNv ||
List(FpuOpcode.CMP, FpuOpcode.MIN_MAX).map(input.opcode === _).orR && rs2NanNv List(FpuOpcode.CMP, FpuOpcode.MIN_MAX).map(input.opcode === _).orR && rs2NanNv
flag.NV setWhen(input.valid && nv) rspNv setWhen(NV)
val rspStreams = Vec(Stream(FpuRsp(p)), portCount) val rspStreams = Vec(Stream(FpuRsp(p)), portCount)
input.ready := !halt && (toFpuRf ? rfOutput.ready | rspStreams.map(_.ready).read(input.source)) input.ready := !halt && (toFpuRf ? rfOutput.ready | rspStreams.map(_.ready).read(input.source))
@ -815,9 +821,14 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
def rsp = rspStreams(i) def rsp = rspStreams(i)
rsp.valid := input.valid && input.source === i && !toFpuRf && !halt rsp.valid := input.valid && input.source === i && !toFpuRf && !halt
rsp.value := result rsp.value := result
rsp.NV := rspNv
rsp.NX := rspNx
io.port(i).rsp << rsp.stage() io.port(i).rsp << rsp.stage()
completion(i).increments += (RegNext(rsp.fire) init(False))
} }
rfOutput.NV := NV
rfOutput.DZ := False
} }
val mul = p.withMul generate new Area{ val mul = p.withMul generate new Area{
@ -891,13 +902,14 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
output.exponent := (exp - exponentOne).resized output.exponent := (exp - exponentOne).resized
output.mantissa := man.asUInt output.mantissa := man.asUInt
output.setNormal output.setNormal
val NV = False
when(exp(exp.getWidth-3, 3 bits) >= 5) { output.exponent(p.internalExponentSize-2, 2 bits) := 3 } when(exp(exp.getWidth-3, 3 bits) >= 5) { output.exponent(p.internalExponentSize-2, 2 bits) := 3 }
val flag = io.port(input.source).completion.flag // val flag = io.port(input.source).completion.flag
when(forceNan) { when(forceNan) {
output.setNanQuiet output.setNanQuiet
flag.NV setWhen(input.valid && (infinitynan || input.rs1.isNanSignaling || input.rs2.isNanSignaling)) NV setWhen(input.valid && (infinitynan || input.rs1.isNanSignaling || input.rs2.isNanSignaling))
} elsewhen(forceOverflow) { } elsewhen(forceOverflow) {
output.setInfinity output.setInfinity
} elsewhen(forceZero) { } elsewhen(forceZero) {
@ -909,6 +921,8 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
val result = new Area { val result = new Area {
def input = norm.input def input = norm.input
def NV = norm.NV
val notMul = new Area { val notMul = new Area {
val output = Flow(UInt(p.internalMantissaSize + 1 bits)) val output = Flow(UInt(p.internalMantissaSize + 1 bits))
output.valid := input.valid && input.divSqrt output.valid := input.valid && input.divSqrt
@ -924,6 +938,8 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
output.roundMode := input.roundMode output.roundMode := input.roundMode
output.scrap := norm.scrap output.scrap := norm.scrap
output.value := norm.output output.value := norm.output
output.NV := NV
output.DZ := False
decode.mulToAdd.valid := input.valid && input.add decode.mulToAdd.valid := input.valid && input.add
decode.mulToAdd.source := input.source decode.mulToAdd.source := input.source
@ -1245,8 +1261,9 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
output.scrap := (mantissa(1) | mantissa(0) | roundingScrap) output.scrap := (mantissa(1) | mantissa(0) | roundingScrap)
val flag = io.port(input.source).completion.flag // val flag = io.port(input.source).completion.flag
flag.NV setWhen (input.valid && (infinityNan || input.rs1.isNanSignaling || input.rs2.isNanSignaling)) output.NV := (input.valid && (infinityNan || input.rs1.isNanSignaling || input.rs2.isNanSignaling))
output.DZ := False
when(forceNan) { when(forceNan) {
output.value.setNanQuiet output.value.setNanQuiet
} elsewhen (forceZero) { } elsewhen (forceZero) {
@ -1272,8 +1289,6 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
if(p.withMul) (inputs += mul.result.output) if(p.withMul) (inputs += mul.result.output)
if(p.withShortPipMisc) (inputs += shortPip.rfOutput.pipelined(m2s = true)) if(p.withShortPipMisc) (inputs += shortPip.rfOutput.pipelined(m2s = true))
val arbitrated = StreamArbiterFactory.lowerFirst.noLock.on(inputs) val arbitrated = StreamArbiterFactory.lowerFirst.noLock.on(inputs)
val isCommited = rf.lock.map(_.commited).read(arbitrated.lockId)
val commited = arbitrated.haltWhen(!isCommited).toFlow
} }
class RoundFront extends MergeInput{ class RoundFront extends MergeInput{
@ -1283,7 +1298,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
} }
val roundFront = new Area { val roundFront = new Area {
val input = merge.commited.stage() val input = merge.arbitrated.stage()
val output = input.swapPayload(new RoundFront()) val output = input.swapPayload(new RoundFront())
output.payload.assignSomeByName(input.payload) output.payload.assignSomeByName(input.payload)
@ -1313,7 +1328,8 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
val roundBack = new Area{ val roundBack = new Area{
val input = roundFront.output.stage() val input = roundFront.output.stage()
val output = input.swapPayload(RoundOutput()) val isCommited = rf.lock.map(_.commited).read(input.lockId)
val output = input.haltWhen(!isCommited).toFlow.swapPayload(RoundOutput())
import input.payload._ import input.payload._
val math = p.internalFloating() val math = p.internalFloating()
@ -1375,15 +1391,16 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
nx setWhen(!input.value.special && (roundAdjusted =/= 0)) nx setWhen(!input.value.special && (roundAdjusted =/= 0))
when(input.valid){ val write = rf.lock.map(_.write).read(input.lockId)
val flag = io.port(input.source).completion.flag output.NX := nx & write
flag.NX setWhen(nx) output.OF := of & write
flag.OF setWhen(of) output.UF := uf & write
flag.UF setWhen(uf) output.NV := input.NV & write
} output.DZ := input.DZ & write
output.source := input.source output.source := input.source
output.lockId := input.lockId output.lockId := input.lockId
output.rd := input.rd output.rd := input.rd
output.write := write
if(p.withDouble) output.format := input.format if(p.withDouble) output.format := input.format
output.value := patched output.value := patched
} }
@ -1392,7 +1409,14 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
val input = roundBack.output.stage() val input = roundBack.output.stage()
for(i <- 0 until portCount){ for(i <- 0 until portCount){
completion(i).increments += (RegNext(input.fire && input.source === i) init(False)) val c = io.port(i).completion
c.valid := input.fire && input.source === i
c.flags.NX := input.NX
c.flags.OF := input.OF
c.flags.UF := input.UF
c.flags.NV := input.NV
c.flags.DZ := input.DZ
c.written := input.write
} }
when(input.valid){ when(input.valid){
@ -1402,7 +1426,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
} }
val port = rf.ram.writePort val port = rf.ram.writePort
port.valid := input.valid && rf.lock.map(_.write).read(input.lockId) port.valid := input.valid && input.write
port.address := input.source @@ input.rd port.address := input.source @@ input.rd
port.data.value := input.value port.data.value := input.value
if(p.withDouble) port.data.boxed := input.format === FpuFormat.FLOAT if(p.withDouble) port.data.boxed := input.format === FpuFormat.FLOAT

View File

@ -136,14 +136,8 @@ case class FpuFlags() extends Bundle{
} }
case class FpuCompletion() extends Bundle{ case class FpuCompletion() extends Bundle{
val flag = FpuFlags() val flags = FpuFlags()
val count = UInt(2 bits) val written = Bool() //Used for verification purposes
def stage() = {
val ret = FpuCompletion().setCompositeName(this, "stage", true)
ret := this
ret
}
} }
case class FpuCmd(p : FpuParameter) extends Bundle{ case class FpuCmd(p : FpuParameter) extends Bundle{
@ -163,13 +157,14 @@ case class FpuCommit(p : FpuParameter) extends Bundle{
case class FpuRsp(p : FpuParameter) extends Bundle{ case class FpuRsp(p : FpuParameter) extends Bundle{
val value = p.storeLoadType() // IEEE754 store || Integer val value = p.storeLoadType() // IEEE754 store || Integer
val NV, NX = Bool()
} }
case class FpuPort(p : FpuParameter) extends Bundle with IMasterSlave { case class FpuPort(p : FpuParameter) extends Bundle with IMasterSlave {
val cmd = Stream(FpuCmd(p)) val cmd = Stream(FpuCmd(p))
val commit = Stream(FpuCommit(p)) val commit = Stream(FpuCommit(p))
val rsp = Stream(FpuRsp(p)) val rsp = Stream(FpuRsp(p))
val completion = FpuCompletion() val completion = Flow(FpuCompletion())
override def asMaster(): Unit = { override def asMaster(): Unit = {
master(cmd, commit) master(cmd, commit)

View File

@ -140,7 +140,7 @@ class FpuPlugin(externalFpu : Boolean = false,
} }
//TODO FMV_X_X + doubles //TODO FMV_X_X + doubles
port = FpuPort(p) port = FpuPort(p).addTag(Verilator.public)
if(externalFpu) master(port) if(externalFpu) master(port)
val dBusEncoding = pipeline.service(classOf[DBusEncodingService]) val dBusEncoding = pipeline.service(classOf[DBusEncodingService])
@ -168,16 +168,16 @@ class FpuPlugin(externalFpu : Boolean = false,
val csr = pipeline plug new Area{ val csr = pipeline plug new Area{
val pendings = Reg(UInt(5 bits)) init(0) val pendings = Reg(UInt(5 bits)) init(0)
pendings := pendings + U(port.cmd.fire) - port.completion.count pendings := pendings + U(port.cmd.fire) - U(port.completion.fire) - U(port.rsp.fire)
val hasPending = pendings =/= 0 val hasPending = pendings =/= 0
val flags = Reg(FpuFlags()) val flags = Reg(FpuFlags())
flags.NV init(False) setWhen(port.completion.flag.NV) flags.NV init(False) setWhen(port.completion.fire && port.completion.flags.NV)
flags.DZ init(False) setWhen(port.completion.flag.DZ) flags.DZ init(False) setWhen(port.completion.fire && port.completion.flags.DZ)
flags.OF init(False) setWhen(port.completion.flag.OF) flags.OF init(False) setWhen(port.completion.fire && port.completion.flags.OF)
flags.UF init(False) setWhen(port.completion.flag.UF) flags.UF init(False) setWhen(port.completion.fire && port.completion.flags.UF)
flags.NX init(False) setWhen(port.completion.flag.NX) flags.NX init(False) setWhen(port.completion.fire && port.completion.flags.NX)
val service = pipeline.service(classOf[CsrInterface]) val service = pipeline.service(classOf[CsrInterface])
val rm = Reg(Bits(3 bits)) init(0) val rm = Reg(Bits(3 bits)) init(0)
@ -244,6 +244,10 @@ class FpuPlugin(externalFpu : Boolean = false,
when(arbitration.isValid) { when(arbitration.isValid) {
dBusEncoding.bypassStore(storeFormated) dBusEncoding.bypassStore(storeFormated)
output(REGFILE_WRITE_DATA) := port.rsp.value(31 downto 0) output(REGFILE_WRITE_DATA) := port.rsp.value(31 downto 0)
when(!arbitration.isStuck && !arbitration.isRemoved){
csr.flags.NV setWhen(port.rsp.NV)
csr.flags.NX setWhen(port.rsp.NX)
}
} }
when(!port.rsp.valid){ when(!port.rsp.valid){
arbitration.haltByOther := True arbitration.haltByOther := True

View File

@ -239,6 +239,9 @@ class success : public std::exception { };
#define MSTATUS_READ_MASK 0x1888 #define MSTATUS_READ_MASK 0x1888
#endif #endif
#define u32 uint32_t
#define u32 uint64_t
class RiscvGolden { class RiscvGolden {
public: public:
@ -4043,26 +4046,26 @@ int main(int argc, char **argv, char **env) {
#endif #endif
for(const string &name : riscvTestMain){ for(const string &name : riscvTestMain){
redo(REDO,RiscvTest(name).run();) redo(REDO,RiscvTest(name).withRiscvRef()->run();)
} }
for(const string &name : riscvTestMemory){ for(const string &name : riscvTestMemory){
redo(REDO,RiscvTest(name).run();) redo(REDO,RiscvTest(name).withRiscvRef()->run();)
} }
#ifdef MUL #ifdef MUL
for(const string &name : riscvTestMul){ for(const string &name : riscvTestMul){
redo(REDO,RiscvTest(name).run();) redo(REDO,RiscvTest(name).withRiscvRef()->run();)
} }
#endif #endif
#ifdef DIV #ifdef DIV
for(const string &name : riscvTestDiv){ for(const string &name : riscvTestDiv){
redo(REDO,RiscvTest(name).run();) redo(REDO,RiscvTest(name).withRiscvRef()->run();)
} }
#endif #endif
#ifdef COMPRESSED #ifdef COMPRESSED
redo(REDO,RiscvTest("rv32uc-p-rvc").bootAt(0x800000FCu)->run()); redo(REDO,RiscvTest("rv32uc-p-rvc").withRiscvRef()->bootAt(0x800000FCu)->run());
#endif #endif
#if defined(CSR) && !defined(CSR_SKIP_TEST) #if defined(CSR) && !defined(CSR_SKIP_TEST)

View File

@ -55,13 +55,13 @@ class FpuTest extends FunSuite{
} }
def testP(p : FpuParameter){ def testP(p : FpuParameter){
val portCount = 1 val portCount = 4
val config = SimConfig val config = SimConfig
config.allOptimisation config.allOptimisation
// if(p.withDouble) config.withFstWave // if(p.withDouble) config.withFstWave
config.compile(new FpuCore(portCount, p){ config.compile(new FpuCore(portCount, p){
for(i <- 0 until portCount) out(Bits(5 bits)).setName(s"flagAcc$i") := io.port(i).completion.flag.asBits for(i <- 0 until portCount) out(Bits(5 bits)).setName(s"flagAcc$i") := io.port(i).completion.flags.asBits
setDefinitionName("FpuCore"+ (if(p.withDouble) "Double" else "")) setDefinitionName("FpuCore"+ (if(p.withDouble) "Double" else ""))
}).doSim(seed = 42){ dut => }).doSim(seed = 42){ dut =>
dut.clockDomain.forkStimulus(10) dut.clockDomain.forkStimulus(10)
@ -228,8 +228,10 @@ class FpuTest extends FunSuite{
val flagAggregated = dut.reflectBaseType(s"flagAcc$id").asInstanceOf[Bits] val flagAggregated = dut.reflectBaseType(s"flagAcc$id").asInstanceOf[Bits]
dut.clockDomain.onSamplings{ dut.clockDomain.onSamplings{
val c = dut.io.port(id).completion val c = dut.io.port(id).completion
pendingMiaou -= c.count.toInt if(c.valid.toBoolean) {
pendingMiaou -= 1
flagAccumulator |= flagAggregated.toInt flagAccumulator |= flagAggregated.toInt
}
dut.writeback.randomSim.randomize() dut.writeback.randomSim.randomize()
} }
@ -242,6 +244,9 @@ class FpuTest extends FunSuite{
StreamMonitor(dut.io.port(id)rsp, dut.clockDomain){payload => StreamMonitor(dut.io.port(id)rsp, dut.clockDomain){payload =>
pendingMiaou -= 1
if(payload.NV.toBoolean) flagAccumulator |= 1 << 4
if(payload.NX.toBoolean) flagAccumulator |= 1 << 0
rspQueue.dequeue().apply(payload) rspQueue.dequeue().apply(payload)
} }