fpu fix exception flag handeling

This commit is contained in:
Dolu1990 2021-02-19 13:03:48 +01:00
parent e504afbf18
commit 3f226b758c
5 changed files with 94 additions and 63 deletions

View File

@ -125,6 +125,8 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
val scrap = Bool()
val roundMode = FpuRoundMode()
val format = p.withDouble generate FpuFormat()
val NV = Bool()
val DZ = Bool() //TODO
}
case class RoundOutput() extends Bundle{
@ -133,6 +135,8 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
val rd = p.rfAddress()
val value = p.internalFloating()
val format = p.withDouble generate FpuFormat()
val NV, NX, OF, UF, DZ = Bool()
val write = Bool()
}
val rf = new Area{
@ -153,20 +157,20 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
val lockFreeId = OHMasking.first(lock.map(!_.valid))
}
val completion = for(source <- 0 until portCount) yield new Area{
def port = io.port(source)
port.completion.flag.NV := False
port.completion.flag.DZ := False
port.completion.flag.OF := False
port.completion.flag.UF := False
port.completion.flag.NX := False
val increments = ArrayBuffer[Bool]()
afterElaboration{
port.completion.count := increments.map(_.asUInt.resize(log2Up(increments.size + 1))).reduceBalancedTree(_ + _)
}
}
// val completion = for(source <- 0 until portCount) yield new Area{
// def port = io.port(source)
// port.completion.flag.NV := False
// port.completion.flag.DZ := False
// port.completion.flag.OF := False
// port.completion.flag.UF := False
// port.completion.flag.NX := False
//
// val increments = ArrayBuffer[Bool]()
//
// afterElaboration{
// port.completion.count := increments.map(_.asUInt.resize(log2Up(increments.size + 1))).reduceBalancedTree(_ + _)
// }
// }
val commitFork = new Area{
val load, commit = Vec(Stream(FpuCommit(p)), portCount)
@ -522,6 +526,8 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
output.value.mantissa := recoded.mantissa @@ U"0"
output.value.special := recoded.special
output.scrap := False
output.NV := False
output.DZ := False
when(input.i2f){
output.value.sign := i2fSign
output.value.exponent := (U(exponentOne+31) - fsm.shift.by).resized
@ -534,6 +540,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
output.value.mantissa := U(i2fHigh) @@ (if(p.withDouble) U"0" else U"")
}
}
}
val shortPip = new Area{
@ -543,8 +550,6 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
val result = p.storeLoadType().assignDontCare()
val flag = io.port(input.source).completion.flag
val halt = False
val recodedResult = p.storeLoadType()
val f32 = new Area{
@ -677,7 +682,8 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
}
}
val rspNv = False
val rspNx = False
val f2i = new Area{ //Will not work for 64 bits float max value rounding
val unsigned = fsm.shift.output(32 downto 0) >> 1
@ -703,9 +709,9 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
val low = overflow
val high = input.arg(0) ^ overflow
result := (31 -> high, default -> low)
flag.NV := input.valid && input.opcode === FpuOpcode.F2I && fsm.done && !isZero
rspNv := input.valid && input.opcode === FpuOpcode.F2I && fsm.done && !isZero
} otherwise {
flag.NX := input.valid && input.opcode === FpuOpcode.F2I && fsm.done && round =/= 0
rspNx := input.valid && input.opcode === FpuOpcode.F2I && fsm.done && round =/= 0
}
}
@ -805,9 +811,9 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
val rs2Nan = input.rs2.isNan
val rs1NanNv = input.rs1.isNan && (!input.rs1.isQuiet || signalQuiet)
val rs2NanNv = input.rs2.isNan && (!input.rs2.isQuiet || signalQuiet)
val nv = List(FpuOpcode.CMP, FpuOpcode.MIN_MAX, FpuOpcode.FCVT_X_X).map(input.opcode === _).orR && rs1NanNv ||
val NV = List(FpuOpcode.CMP, FpuOpcode.MIN_MAX, FpuOpcode.FCVT_X_X).map(input.opcode === _).orR && rs1NanNv ||
List(FpuOpcode.CMP, FpuOpcode.MIN_MAX).map(input.opcode === _).orR && rs2NanNv
flag.NV setWhen(input.valid && nv)
rspNv setWhen(NV)
val rspStreams = Vec(Stream(FpuRsp(p)), portCount)
input.ready := !halt && (toFpuRf ? rfOutput.ready | rspStreams.map(_.ready).read(input.source))
@ -815,9 +821,14 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
def rsp = rspStreams(i)
rsp.valid := input.valid && input.source === i && !toFpuRf && !halt
rsp.value := result
rsp.NV := rspNv
rsp.NX := rspNx
io.port(i).rsp << rsp.stage()
completion(i).increments += (RegNext(rsp.fire) init(False))
}
rfOutput.NV := NV
rfOutput.DZ := False
}
val mul = p.withMul generate new Area{
@ -891,13 +902,14 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
output.exponent := (exp - exponentOne).resized
output.mantissa := man.asUInt
output.setNormal
val NV = False
when(exp(exp.getWidth-3, 3 bits) >= 5) { output.exponent(p.internalExponentSize-2, 2 bits) := 3 }
val flag = io.port(input.source).completion.flag
// val flag = io.port(input.source).completion.flag
when(forceNan) {
output.setNanQuiet
flag.NV setWhen(input.valid && (infinitynan || input.rs1.isNanSignaling || input.rs2.isNanSignaling))
NV setWhen(input.valid && (infinitynan || input.rs1.isNanSignaling || input.rs2.isNanSignaling))
} elsewhen(forceOverflow) {
output.setInfinity
} elsewhen(forceZero) {
@ -909,6 +921,8 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
val result = new Area {
def input = norm.input
def NV = norm.NV
val notMul = new Area {
val output = Flow(UInt(p.internalMantissaSize + 1 bits))
output.valid := input.valid && input.divSqrt
@ -924,6 +938,8 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
output.roundMode := input.roundMode
output.scrap := norm.scrap
output.value := norm.output
output.NV := NV
output.DZ := False
decode.mulToAdd.valid := input.valid && input.add
decode.mulToAdd.source := input.source
@ -1245,8 +1261,9 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
output.scrap := (mantissa(1) | mantissa(0) | roundingScrap)
val flag = io.port(input.source).completion.flag
flag.NV setWhen (input.valid && (infinityNan || input.rs1.isNanSignaling || input.rs2.isNanSignaling))
// val flag = io.port(input.source).completion.flag
output.NV := (input.valid && (infinityNan || input.rs1.isNanSignaling || input.rs2.isNanSignaling))
output.DZ := False
when(forceNan) {
output.value.setNanQuiet
} elsewhen (forceZero) {
@ -1272,8 +1289,6 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
if(p.withMul) (inputs += mul.result.output)
if(p.withShortPipMisc) (inputs += shortPip.rfOutput.pipelined(m2s = true))
val arbitrated = StreamArbiterFactory.lowerFirst.noLock.on(inputs)
val isCommited = rf.lock.map(_.commited).read(arbitrated.lockId)
val commited = arbitrated.haltWhen(!isCommited).toFlow
}
class RoundFront extends MergeInput{
@ -1283,7 +1298,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
}
val roundFront = new Area {
val input = merge.commited.stage()
val input = merge.arbitrated.stage()
val output = input.swapPayload(new RoundFront())
output.payload.assignSomeByName(input.payload)
@ -1313,7 +1328,8 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
val roundBack = new Area{
val input = roundFront.output.stage()
val output = input.swapPayload(RoundOutput())
val isCommited = rf.lock.map(_.commited).read(input.lockId)
val output = input.haltWhen(!isCommited).toFlow.swapPayload(RoundOutput())
import input.payload._
val math = p.internalFloating()
@ -1375,15 +1391,16 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
nx setWhen(!input.value.special && (roundAdjusted =/= 0))
when(input.valid){
val flag = io.port(input.source).completion.flag
flag.NX setWhen(nx)
flag.OF setWhen(of)
flag.UF setWhen(uf)
}
val write = rf.lock.map(_.write).read(input.lockId)
output.NX := nx & write
output.OF := of & write
output.UF := uf & write
output.NV := input.NV & write
output.DZ := input.DZ & write
output.source := input.source
output.lockId := input.lockId
output.rd := input.rd
output.write := write
if(p.withDouble) output.format := input.format
output.value := patched
}
@ -1392,7 +1409,14 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
val input = roundBack.output.stage()
for(i <- 0 until portCount){
completion(i).increments += (RegNext(input.fire && input.source === i) init(False))
val c = io.port(i).completion
c.valid := input.fire && input.source === i
c.flags.NX := input.NX
c.flags.OF := input.OF
c.flags.UF := input.UF
c.flags.NV := input.NV
c.flags.DZ := input.DZ
c.written := input.write
}
when(input.valid){
@ -1402,7 +1426,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
}
val port = rf.ram.writePort
port.valid := input.valid && rf.lock.map(_.write).read(input.lockId)
port.valid := input.valid && input.write
port.address := input.source @@ input.rd
port.data.value := input.value
if(p.withDouble) port.data.boxed := input.format === FpuFormat.FLOAT

View File

@ -136,14 +136,8 @@ case class FpuFlags() extends Bundle{
}
case class FpuCompletion() extends Bundle{
val flag = FpuFlags()
val count = UInt(2 bits)
def stage() = {
val ret = FpuCompletion().setCompositeName(this, "stage", true)
ret := this
ret
}
val flags = FpuFlags()
val written = Bool() //Used for verification purposes
}
case class FpuCmd(p : FpuParameter) extends Bundle{
@ -163,13 +157,14 @@ case class FpuCommit(p : FpuParameter) extends Bundle{
case class FpuRsp(p : FpuParameter) extends Bundle{
val value = p.storeLoadType() // IEEE754 store || Integer
val NV, NX = Bool()
}
case class FpuPort(p : FpuParameter) extends Bundle with IMasterSlave {
val cmd = Stream(FpuCmd(p))
val commit = Stream(FpuCommit(p))
val rsp = Stream(FpuRsp(p))
val completion = FpuCompletion()
val completion = Flow(FpuCompletion())
override def asMaster(): Unit = {
master(cmd, commit)

View File

@ -140,7 +140,7 @@ class FpuPlugin(externalFpu : Boolean = false,
}
//TODO FMV_X_X + doubles
port = FpuPort(p)
port = FpuPort(p).addTag(Verilator.public)
if(externalFpu) master(port)
val dBusEncoding = pipeline.service(classOf[DBusEncodingService])
@ -168,16 +168,16 @@ class FpuPlugin(externalFpu : Boolean = false,
val csr = pipeline plug new Area{
val pendings = Reg(UInt(5 bits)) init(0)
pendings := pendings + U(port.cmd.fire) - port.completion.count
pendings := pendings + U(port.cmd.fire) - U(port.completion.fire) - U(port.rsp.fire)
val hasPending = pendings =/= 0
val flags = Reg(FpuFlags())
flags.NV init(False) setWhen(port.completion.flag.NV)
flags.DZ init(False) setWhen(port.completion.flag.DZ)
flags.OF init(False) setWhen(port.completion.flag.OF)
flags.UF init(False) setWhen(port.completion.flag.UF)
flags.NX init(False) setWhen(port.completion.flag.NX)
flags.NV init(False) setWhen(port.completion.fire && port.completion.flags.NV)
flags.DZ init(False) setWhen(port.completion.fire && port.completion.flags.DZ)
flags.OF init(False) setWhen(port.completion.fire && port.completion.flags.OF)
flags.UF init(False) setWhen(port.completion.fire && port.completion.flags.UF)
flags.NX init(False) setWhen(port.completion.fire && port.completion.flags.NX)
val service = pipeline.service(classOf[CsrInterface])
val rm = Reg(Bits(3 bits)) init(0)
@ -244,6 +244,10 @@ class FpuPlugin(externalFpu : Boolean = false,
when(arbitration.isValid) {
dBusEncoding.bypassStore(storeFormated)
output(REGFILE_WRITE_DATA) := port.rsp.value(31 downto 0)
when(!arbitration.isStuck && !arbitration.isRemoved){
csr.flags.NV setWhen(port.rsp.NV)
csr.flags.NX setWhen(port.rsp.NX)
}
}
when(!port.rsp.valid){
arbitration.haltByOther := True

View File

@ -239,6 +239,9 @@ class success : public std::exception { };
#define MSTATUS_READ_MASK 0x1888
#endif
#define u32 uint32_t
#define u32 uint64_t
class RiscvGolden {
public:
@ -4043,26 +4046,26 @@ int main(int argc, char **argv, char **env) {
#endif
for(const string &name : riscvTestMain){
redo(REDO,RiscvTest(name).run();)
redo(REDO,RiscvTest(name).withRiscvRef()->run();)
}
for(const string &name : riscvTestMemory){
redo(REDO,RiscvTest(name).run();)
redo(REDO,RiscvTest(name).withRiscvRef()->run();)
}
#ifdef MUL
for(const string &name : riscvTestMul){
redo(REDO,RiscvTest(name).run();)
redo(REDO,RiscvTest(name).withRiscvRef()->run();)
}
#endif
#ifdef DIV
for(const string &name : riscvTestDiv){
redo(REDO,RiscvTest(name).run();)
redo(REDO,RiscvTest(name).withRiscvRef()->run();)
}
#endif
#ifdef COMPRESSED
redo(REDO,RiscvTest("rv32uc-p-rvc").bootAt(0x800000FCu)->run());
redo(REDO,RiscvTest("rv32uc-p-rvc").withRiscvRef()->bootAt(0x800000FCu)->run());
#endif
#if defined(CSR) && !defined(CSR_SKIP_TEST)

View File

@ -55,13 +55,13 @@ class FpuTest extends FunSuite{
}
def testP(p : FpuParameter){
val portCount = 1
val portCount = 4
val config = SimConfig
config.allOptimisation
// if(p.withDouble) config.withFstWave
config.compile(new FpuCore(portCount, p){
for(i <- 0 until portCount) out(Bits(5 bits)).setName(s"flagAcc$i") := io.port(i).completion.flag.asBits
for(i <- 0 until portCount) out(Bits(5 bits)).setName(s"flagAcc$i") := io.port(i).completion.flags.asBits
setDefinitionName("FpuCore"+ (if(p.withDouble) "Double" else ""))
}).doSim(seed = 42){ dut =>
dut.clockDomain.forkStimulus(10)
@ -228,8 +228,10 @@ class FpuTest extends FunSuite{
val flagAggregated = dut.reflectBaseType(s"flagAcc$id").asInstanceOf[Bits]
dut.clockDomain.onSamplings{
val c = dut.io.port(id).completion
pendingMiaou -= c.count.toInt
if(c.valid.toBoolean) {
pendingMiaou -= 1
flagAccumulator |= flagAggregated.toInt
}
dut.writeback.randomSim.randomize()
}
@ -242,6 +244,9 @@ class FpuTest extends FunSuite{
StreamMonitor(dut.io.port(id)rsp, dut.clockDomain){payload =>
pendingMiaou -= 1
if(payload.NV.toBoolean) flagAccumulator |= 1 << 4
if(payload.NX.toBoolean) flagAccumulator |= 1 << 0
rspQueue.dequeue().apply(payload)
}