diff --git a/src/main/scala/vexriscv/ip/fpu/FpuCore.scala b/src/main/scala/vexriscv/ip/fpu/FpuCore.scala index 56c0fab..047eaa3 100644 --- a/src/main/scala/vexriscv/ip/fpu/FpuCore.scala +++ b/src/main/scala/vexriscv/ip/fpu/FpuCore.scala @@ -185,7 +185,8 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{ is(p.Opcode.CMP) { useRs1 := True; useRs2 := True } is(p.Opcode.SGNJ) { useRd := True; useRs1 := True; useRs2 := True } is(p.Opcode.FMV_X_W) { useRs1 := True } - is(p.Opcode.FMV_W_X) { useRd := True} + is(p.Opcode.FMV_W_X) { useRd := True } + is(p.Opcode.FCLASS ) { useRs1 := True } } val hits = List((useRs1, s0.rs1), (useRs2, s0.rs2), (useRs3, s0.rs3), (useRd, s0.rd)).map{case (use, reg) => use && rf.lock.map(l => l.valid && l.source === s0.source && l.address === reg).orR} @@ -230,7 +231,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{ input.ready setWhen(loadHit && load.ready) load.payload.assignSomeByName(read.output.payload) - val shortPipHit = List(FpuOpcode.STORE, FpuOpcode.F2I, FpuOpcode.CMP, FpuOpcode.I2F, FpuOpcode.MIN_MAX, FpuOpcode.SGNJ, FpuOpcode.FMV_X_W, FpuOpcode.FMV_W_X).map(input.opcode === _).orR + val shortPipHit = List(FpuOpcode.STORE, FpuOpcode.F2I, FpuOpcode.CMP, FpuOpcode.I2F, FpuOpcode.MIN_MAX, FpuOpcode.SGNJ, FpuOpcode.FMV_X_W, FpuOpcode.FMV_W_X, FpuOpcode.FCLASS).map(input.opcode === _).orR val shortPip = Stream(ShortPipInput()) input.ready setWhen(shortPipHit && shortPip.ready) shortPip.valid := input.valid && shortPipHit @@ -317,12 +318,14 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{ val minMaxResult = rs1Smaller ? input.rs1 | input.rs2 val cmpResult = B(rs1Smaller) + val fclassResult = B(0) //TODO switch(input.opcode){ is(FpuOpcode.STORE) { result := storeResult } is(FpuOpcode.F2I) { result := f2iResult } is(FpuOpcode.CMP) { result := cmpResult.resized } //TODO is(FpuOpcode.FMV_X_W) { result := input.rs1.asBits } //TODO + is(FpuOpcode.FCLASS) { result := fclassResult.resized } } val toFpuRf = List(FpuOpcode.MIN_MAX, FpuOpcode.I2F, FpuOpcode.SGNJ, FpuOpcode.FMV_W_X).map(input.opcode === _).orR diff --git a/src/main/scala/vexriscv/ip/fpu/Interface.scala b/src/main/scala/vexriscv/ip/fpu/Interface.scala index 255f9cb..1a0405d 100644 --- a/src/main/scala/vexriscv/ip/fpu/Interface.scala +++ b/src/main/scala/vexriscv/ip/fpu/Interface.scala @@ -31,7 +31,7 @@ case class FpuFloat(exponentSize: Int, } object FpuOpcode extends SpinalEnum{ - val LOAD, STORE, MUL, ADD, FMA, I2F, F2I, CMP, DIV, SQRT, MIN_MAX, SGNJ, FMV_X_W, FMV_W_X = newElement() + val LOAD, STORE, MUL, ADD, FMA, I2F, F2I, CMP, DIV, SQRT, MIN_MAX, SGNJ, FMV_X_W, FMV_W_X, FCLASS = newElement() } object FpuFormat extends SpinalEnum{ @@ -64,7 +64,7 @@ case class FpuCompletion() extends Bundle{ case class FpuCmd(p : FpuParameter) extends Bundle{ val opcode = p.Opcode() val value = Bits(32 bits) // Int to float - val function = Bits(3 bits) // Int to float + val arg = Bits(2 bits) val rs1, rs2, rs3 = p.rfAddress() val rd = p.rfAddress() val format = p.Format() diff --git a/src/main/scala/vexriscv/plugin/FpuPlugin.scala b/src/main/scala/vexriscv/plugin/FpuPlugin.scala index 70b73be..b19fa82 100644 --- a/src/main/scala/vexriscv/plugin/FpuPlugin.scala +++ b/src/main/scala/vexriscv/plugin/FpuPlugin.scala @@ -13,24 +13,96 @@ class FpuPlugin(externalFpu : Boolean = false, object FPU_COMMIT extends Stageable(Bool()) object FPU_LOAD extends Stageable(Bool()) object FPU_RSP extends Stageable(Bool()) - object FPU_ALU extends Stageable(Bool()) object FPU_FORKED extends Stageable(Bool()) object FPU_OPCODE extends Stageable(FpuOpcode()) + object FPU_ARG extends Stageable(Bits(2 bits)) var port : FpuPort = null override def setup(pipeline: VexRiscv): Unit = { import pipeline.config._ + type ENC = (Stageable[_ <: BaseType],Any) + + val intRfWrite = List[ENC]( + FPU_ENABLE -> True, + FPU_COMMIT -> False, + FPU_RSP -> True, + FPU_LOAD -> False, + REGFILE_WRITE_VALID -> True, + BYPASSABLE_EXECUTE_STAGE -> False, + BYPASSABLE_MEMORY_STAGE -> False + ) + + val floatRfWrite = List[ENC]( + FPU_ENABLE -> True, + FPU_COMMIT -> True, + FPU_RSP -> False, + FPU_LOAD -> False + ) + + val addSub = floatRfWrite :+ FPU_OPCODE -> FpuOpcode.ADD + val mul = floatRfWrite :+ FPU_OPCODE -> FpuOpcode.MUL + val fma = floatRfWrite :+ FPU_OPCODE -> FpuOpcode.FMA + val div = floatRfWrite :+ FPU_OPCODE -> FpuOpcode.DIV + val sqrt = floatRfWrite :+ FPU_OPCODE -> FpuOpcode.SQRT + val fsgnj = floatRfWrite :+ FPU_OPCODE -> FpuOpcode.SGNJ + val fminMax = floatRfWrite :+ FPU_OPCODE -> FpuOpcode.MIN_MAX + val fmvWx = floatRfWrite :+ FPU_OPCODE -> FpuOpcode.FMV_W_X :+ RS1_USE -> True + val fcvtI2f = floatRfWrite :+ FPU_OPCODE -> FpuOpcode.I2F :+ RS1_USE -> True + + val fcmp = intRfWrite :+ FPU_OPCODE -> FpuOpcode.CMP + val fclass = intRfWrite :+ FPU_OPCODE -> FpuOpcode.FCLASS + val fmvXw = intRfWrite :+ FPU_OPCODE -> FpuOpcode.FMV_X_W + val fcvtF2i = intRfWrite :+ FPU_OPCODE -> FpuOpcode.F2I + + val fl = List[ENC]( + FPU_ENABLE -> True, + FPU_OPCODE -> FpuOpcode.LOAD, + FPU_COMMIT -> True, + FPU_LOAD -> True, + FPU_RSP -> False + ) + + val fs = List[ENC]( + FPU_ENABLE -> True, + FPU_OPCODE -> FpuOpcode.STORE, + FPU_COMMIT -> False, + FPU_LOAD -> False, + FPU_RSP -> True + ) + + + def arg(v : Int) = FPU_ARG -> U(v, 2 bits) val decoderService = pipeline.service(classOf[DecoderService]) decoderService.addDefault(FPU_ENABLE, False) decoderService.add(List( - FADD_S -> List(FPU_ENABLE -> True, FPU_OPCODE -> FpuOpcode.ADD, FPU_COMMIT -> True, FPU_ALU -> True , FPU_LOAD -> False, FPU_RSP -> False), - FLW -> List(FPU_ENABLE -> True, FPU_OPCODE -> FpuOpcode.LOAD, FPU_COMMIT -> True, FPU_ALU -> False, FPU_LOAD -> True , FPU_RSP -> False), - FCVT_S_WU -> List(FPU_ENABLE -> True, FPU_OPCODE -> FpuOpcode.I2F , FPU_COMMIT -> True , FPU_ALU -> True, FPU_LOAD -> False, FPU_RSP -> False, RS1_USE -> True), - FSW -> List(FPU_ENABLE -> True, FPU_OPCODE -> FpuOpcode.STORE, FPU_COMMIT -> False, FPU_ALU -> False, FPU_LOAD -> False, FPU_RSP -> True), - FCVT_WU_S -> List(FPU_ENABLE -> True, FPU_OPCODE -> FpuOpcode.F2I , FPU_COMMIT -> False, FPU_ALU -> False, FPU_LOAD -> False, FPU_RSP -> True, REGFILE_WRITE_VALID -> True, BYPASSABLE_EXECUTE_STAGE -> False, BYPASSABLE_MEMORY_STAGE -> False), - FLE_S -> List(FPU_ENABLE -> True, FPU_OPCODE -> FpuOpcode.CMP , FPU_COMMIT -> False, FPU_ALU -> False, FPU_LOAD -> False, FPU_RSP -> True, REGFILE_WRITE_VALID -> True, BYPASSABLE_EXECUTE_STAGE -> False, BYPASSABLE_MEMORY_STAGE -> False) + FADD_S -> (addSub :+ arg(0)), + FSUB_S -> (addSub :+ arg(1)), + FMADD_S -> (fma :+ arg(0)), + FMSUB_S -> (fma :+ arg(1)), + FNMSUB_S -> (fma :+ arg(2)), + FNMADD_S -> (fma :+ arg(3)), + FMUL_S -> (mul), + FDIV_S -> (div), + FSQRT_S -> (sqrt), + FLW -> (fl), + FSW -> (fs), + FCVT_S_WU -> (fcvtI2f :+ arg(0)), + FCVT_S_W -> (fcvtI2f :+ arg(1)), + FCVT_WU_S -> (fcvtF2i :+ arg(0)), + FCVT_W_S -> (fcvtF2i :+ arg(1)), + FCLASS_S -> (fclass), + FLE_S -> (fcmp :+ arg(0)), + FEQ_S -> (fcmp :+ arg(1)), + FLT_S -> (fcmp :+ arg(2)), + FSGNJ_S -> (fsgnj :+ arg(0)), + FSGNJN_S -> (fsgnj :+ arg(1)), + FSGNJX_S -> (fsgnj :+ arg(2)), + FMIN_S -> (fminMax :+ arg(0)), + FMAX_S -> (fminMax :+ arg(1)), + FMV_X_W -> (fmvXw), + FMV_W_X -> (fmvWx) )) port = FpuPort(p) @@ -92,8 +164,8 @@ class FpuPlugin(externalFpu : Boolean = false, //Maybe it might be better to not fork before fire to avoid RF stall on commits val forked = Reg(Bool) setWhen(port.cmd.fire) clearWhen(!arbitration.isStuck) init(False) - val i2fReady = Reg(Bool()) setWhen(!arbitration.isStuckByOthers) clearWhen(!arbitration.isStuck) - val hazard = input(FPU_OPCODE) === FpuOpcode.I2F && !i2fReady || csr.pendings.msb || csr.csrActive + val intRfReady = Reg(Bool()) setWhen(!arbitration.isStuckByOthers) clearWhen(!arbitration.isStuck) + val hazard = (input(RS1_USE) && !intRfReady) || csr.pendings.msb || csr.csrActive arbitration.haltItself setWhen(arbitration.isValid && input(FPU_ENABLE) && hazard) arbitration.haltItself setWhen(port.cmd.isStall) @@ -101,7 +173,7 @@ class FpuPlugin(externalFpu : Boolean = false, port.cmd.valid := arbitration.isValid && input(FPU_ENABLE) && !forked && !hazard port.cmd.opcode := input(FPU_OPCODE) port.cmd.value := RegNext(output(RS1)) - port.cmd.function := 0 + port.cmd.arg := input(FPU_ARG) port.cmd.rs1 := input(INSTRUCTION)(rs1Range).asUInt port.cmd.rs2 := input(INSTRUCTION)(rs2Range).asUInt port.cmd.rs3 := input(INSTRUCTION)(rs3Range).asUInt