diff --git a/src/main/scala/vexriscv/ip/fpu/FpuCore.scala b/src/main/scala/vexriscv/ip/fpu/FpuCore.scala index 33a792e..88fa0e0 100644 --- a/src/main/scala/vexriscv/ip/fpu/FpuCore.scala +++ b/src/main/scala/vexriscv/ip/fpu/FpuCore.scala @@ -227,13 +227,13 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{ val input = read.output.combStage() input.ready := False - val loadHit = input.opcode === p.Opcode.LOAD + val loadHit = List(FpuOpcode.LOAD, FpuOpcode.FMV_W_X).map(input.opcode === _).orR val load = Stream(LoadInput()) load.valid := input.valid && loadHit input.ready setWhen(loadHit && load.ready) load.payload.assignSomeByName(read.output.payload) - val shortPipHit = List(FpuOpcode.STORE, FpuOpcode.F2I, FpuOpcode.CMP, FpuOpcode.I2F, FpuOpcode.MIN_MAX, FpuOpcode.SGNJ, FpuOpcode.FMV_X_W, FpuOpcode.FMV_W_X, FpuOpcode.FCLASS).map(input.opcode === _).orR + val shortPipHit = List(FpuOpcode.STORE, FpuOpcode.F2I, FpuOpcode.CMP, FpuOpcode.I2F, FpuOpcode.MIN_MAX, FpuOpcode.SGNJ, FpuOpcode.FMV_X_W, FpuOpcode.FCLASS).map(input.opcode === _).orR val shortPip = Stream(ShortPipInput()) input.ready setWhen(shortPipHit && shortPip.ready) shortPip.valid := input.valid && shortPipHit @@ -283,43 +283,99 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{ } val load = new Area{ - val input = decode.load.stage() - val filtred = commitFork.load.map(port => port.takeWhen(port.load)) - def feed = filtred(input.source) - val hazard = !feed.valid - val f32Mantissa = feed.value(0, 23 bits).asUInt - val f32Exponent = feed.value(23, 8 bits).asUInt - val f32Sign = feed.value(31) + case class S0() extends Bundle{ + val source = Source() + val lockId = lockIdType() + val rd = p.rfAddress() + val value = FpuFloat(exponentSize = p.internalExponentSize-1, mantissaSize = p.internalMantissaSize) + } - val expZero = f32Exponent === 0 - val expOne = f32Exponent === 255 - val manZero = f32Mantissa === 0 + val s0 = new Area{ + val input = decode.load.stage() + val filtred = commitFork.load.map(port => port.takeWhen(port.sync)) + def feed = filtred(input.source) + val hazard = !feed.valid - val isZero = expZero && manZero - val isSubnormal = expZero && !manZero - val isNormal = !expOne && !expZero - val isInfinity = expOne && manZero - val isNan = expOne && !manZero - val isQuiet = f32Mantissa.msb + val output = input.haltWhen(hazard).swapPayload(S0()) + filtred.foreach(_.ready := False) + feed.ready := input.valid && output.ready + output.source := input.source + output.lockId := input.lockId + output.rd := input.rd + output.value.mantissa := feed.value(0, 23 bits).asUInt + output.value.exponent := feed.value(23, 8 bits).asUInt + output.value.sign := feed.value(31) + } - val recoded = p.internalFloating() - recoded.mantissa := f32Mantissa - recoded.exponent := f32Exponent - recoded.sign := f32Sign - recoded.setNormal - when(isZero){recoded.setZero} - when(isSubnormal){recoded.setSubnormal} - when(isInfinity){recoded.setInfinity} - when(isNan){recoded.setNan} + val s1 = new Area{ + val input = s0.output.stage() + val busy = False - val output = input.haltWhen(hazard).swapPayload(WriteInput()) - filtred.foreach(_.ready := False) - feed.ready := input.valid && output.ready - output.source := input.source - output.lockId := input.lockId - output.rd := input.rd - output.value := recoded + val f32Mantissa = input.value.mantissa + val f32Exponent = input.value.exponent + val f32Sign = input.value.sign + + val expZero = f32Exponent === 0 + val expOne = f32Exponent === 255 + val manZeroReject = Reg(Bool()) setWhen(busy) clearWhen(!input.isStall) + val manZero = f32Mantissa === 0 && !manZeroReject + + val isZero = expZero && manZero + val isSubnormal = expZero && !manZero + val isNormal = !expOne && !expZero + val isInfinity = expOne && manZero + val isNan = expOne && !manZero + val isQuiet = f32Mantissa.msb + + val subnormal = new Area{ + val manTop = Reg(UInt(log2Up(p.internalMantissaSize) bits)) + val shift = isSubnormal ? manTop | U(0) + val counter = Reg(UInt(log2Up(p.internalMantissaSize+1) bits)) + val done, boot = Reg(Bool()) + when(isSubnormal && !done){ + busy := True + when(boot){ + manTop := OHToUInt(OHMasking.first((f32Mantissa).reversed)) + boot := False + } otherwise { + input.value.mantissa.getDrivingReg := input.value.mantissa |<< 1 + counter := counter + 1 + when(counter === shift) { + done := True + } + } + } + + val expOffset = (UInt(p.internalExponentSize bits)) + expOffset := 0 + when(isSubnormal){ + expOffset := manTop.resized + } + + when(!input.isStall){ + counter := 0 + done := False + boot := True + } + } + + val recoded = p.internalFloating() + recoded.mantissa := f32Mantissa + recoded.exponent := (f32Exponent -^ subnormal.expOffset + (exponentOne - 127)).resized + recoded.sign := f32Sign + recoded.setNormal + when(isZero){recoded.setZero} + //when(isSubnormal){recoded.setSubnormal} + when(isInfinity){recoded.setInfinity} + when(isNan){recoded.setNan} + + val output = input.haltWhen(busy).swapPayload(WriteInput()) + output.source := input.source + output.lockId := input.lockId + output.rd := input.rd + output.value := recoded + } } val shortPip = new Area{ @@ -330,23 +386,62 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{ val result = p.storeLoadType().assignDontCare() val recoded = CombInit(input.rs1) + + + val halt = False + val recodedResult = Bits(32 bits)//recoded.asBits.resize(32 bits) + val f32 = new Area{ + val exp = (recoded.exponent - (exponentOne-127)).resize(8 bits) + val man = recoded.mantissa + } + recodedResult := recoded.sign ## f32.exp ## f32.man + + val subnormal = new Area{ + val isSubnormal = !recoded.special && recoded.exponent <= exponentOne - 127 + val manTop = Reg(UInt(log2Up(p.internalMantissaSize) bits)) + val shift = isSubnormal ? manTop | U(0) + val counter = Reg(UInt(log2Up(p.internalMantissaSize+1) bits)) + val done, boot = Reg(Bool()) + when(isSubnormal && !done){ + halt := True + when(boot){ + manTop := (U(exponentOne - 127) - recoded.exponent).resized + boot := False + } otherwise { + recoded.mantissa.getDrivingReg := (U(counter === 0) @@ recoded.mantissa) >> 1 + counter := counter + 1 + when(counter === shift) { + done := True + } + } + } + + when(isSubnormal){ + f32.exp := 0 + } + when(!input.isStall){ + counter := 0 + done := False + boot := True + } + } + when(recoded.special){ switch(input.rs1.exponent(1 downto 0)){ is(FpuFloat.ZERO){ - recoded.mantissa.clearAll() - recoded.exponent.clearAll() + recodedResult(0,23 bits).clearAll() + recodedResult(23, 8 bits).clearAll() } is(FpuFloat.INFINITY){ - recoded.mantissa.clearAll() - recoded.exponent.setAll() + recodedResult(0, 23 bits).clearAll() + recodedResult(23, 8 bits).setAll() } is(FpuFloat.NAN){ - recoded.exponent.setAll() + recodedResult(23, 8 bits).setAll() } } } - val recodedResult = recoded.asBits.resize(32 bits) val f2iShift = input.rs1.exponent - U(exponentOne) val f2iShifted = (U"1" @@ input.rs1.mantissa) << (f2iShift.resize(5 bits)) @@ -367,32 +462,33 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{ 3 -> (!rs1AbsSmaller && !rs1Equal) ) - val rawToFpu = new Area{ - val f32Mantissa = input.value(0, 23 bits).asUInt - val f32Exponent = input.value(23, 8 bits).asUInt - val f32Sign = input.value(31) - - val expZero = f32Exponent === 0 - val expOne = f32Exponent === 255 - val manZero = f32Mantissa === 0 - - val isZero = expZero && manZero - val isSubnormal = expZero && !manZero - val isNormal = !expOne && !expZero - val isInfinity = expOne && manZero - val isNan = expOne && !manZero - val isQuiet = f32Mantissa.msb - - val recoded = p.internalFloating() - recoded.mantissa := f32Mantissa - recoded.exponent := f32Exponent - recoded.sign := f32Sign - recoded.setNormal - when(isZero){recoded.setZero} - when(isSubnormal){recoded.setSubnormal} - when(isInfinity){recoded.setInfinity} - when(isNan){recoded.setNan} - } + //TODO +// val rawToFpu = new Area{ +// val f32Mantissa = input.value(0, 23 bits).asUInt +// val f32Exponent = input.value(23, 8 bits).asUInt +// val f32Sign = input.value(31) +// +// val expZero = f32Exponent === 0 +// val expOne = f32Exponent === 255 +// val manZero = f32Mantissa === 0 +// +// val isZero = expZero && manZero +// val isSubnormal = expZero && !manZero +// val isNormal = !expOne && !expZero +// val isInfinity = expOne && manZero +// val isNan = expOne && !manZero +// val isQuiet = f32Mantissa.msb +// +// val recoded = p.internalFloating() +// recoded.mantissa := f32Mantissa +// recoded.exponent := f32Exponent +// recoded.sign := f32Sign +// recoded.setNormal +// when(isZero){recoded.setZero} +// when(isSubnormal){recoded.setSubnormal} +// when(isInfinity){recoded.setInfinity} +// when(isNan){recoded.setNan} +// } val minMaxResult = (rs1Smaller ^ input.arg(0)) ? input.rs1 | input.rs2 val cmpResult = B(rs1Smaller && !input.arg(1) || rs1Equal && !input.arg(0)) @@ -401,10 +497,10 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{ val decoded = input.rs1.decode() fclassResult(0) := input.rs1.sign && decoded.isInfinity fclassResult(1) := input.rs1.sign && decoded.isNormal - fclassResult(2) := input.rs1.sign && decoded.isSubnormal +// fclassResult(2) := input.rs1.sign && decoded.isSubnormal //TODO fclassResult(3) := input.rs1.sign && decoded.isZero fclassResult(4) := !input.rs1.sign && decoded.isZero - fclassResult(5) := !input.rs1.sign && decoded.isSubnormal +// fclassResult(5) := !input.rs1.sign && decoded.isSubnormal //TODO fclassResult(6) := !input.rs1.sign && decoded.isNormal fclassResult(7) := !input.rs1.sign && decoded.isInfinity fclassResult(8) := decoded.isNan && !decoded.isQuiet @@ -419,9 +515,9 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{ is(FpuOpcode.FCLASS) { result := fclassResult.resized } } - val toFpuRf = List(FpuOpcode.MIN_MAX, FpuOpcode.I2F, FpuOpcode.SGNJ, FpuOpcode.FMV_W_X).map(input.opcode === _).orR + val toFpuRf = List(FpuOpcode.MIN_MAX, FpuOpcode.I2F, FpuOpcode.SGNJ).map(input.opcode === _).orR - rfOutput.valid := input.valid && toFpuRf + rfOutput.valid := input.valid && toFpuRf && !halt rfOutput.source := input.source rfOutput.lockId := input.lockId rfOutput.rd := input.rd @@ -442,15 +538,12 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{ rfOutput.value.mantissa := input.rs1.mantissa rfOutput.value.special := False //TODO } - is(FpuOpcode.FMV_W_X){ - rfOutput.value := rawToFpu.recoded - } } - input.ready := (toFpuRf ? rfOutput.ready | io.port.map(_.rsp.ready).read(input.source)) + input.ready := !halt && (toFpuRf ? rfOutput.ready | io.port.map(_.rsp.ready).read(input.source)) for(i <- 0 until portCount){ def rsp = io.port(i).rsp - rsp.valid := input.valid && input.source === i && !toFpuRf + rsp.valid := input.valid && input.source === i && !toFpuRf && !halt rsp.value := result completion(i).increments += (RegNext(rsp.fire) init(False)) } @@ -463,7 +556,6 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{ val mulA = U(input.msb1) @@ input.rs1.mantissa val mulB = U(input.msb2) @@ input.rs2.mantissa val mulC = mulA * mulB - val expOffset = ((1 << p.internalExponentSize - 1) - 1) val exp = input.rs1.exponent +^ input.rs2.exponent } @@ -478,13 +570,13 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{ val man = needShift ? mulRounded(1, p.internalMantissaSize bits) | mulRounded(0, p.internalMantissaSize bits) val forceZero = input.rs1.isZeroOrSubnormal || input.rs2.isZeroOrSubnormal - val forceUnderflow = exp <= math.expOffset - val forceOverflow = exp > math.expOffset+254 || input.rs1.isInfinity || input.rs2.isInfinity + val forceUnderflow = exp <= exponentOne + exponentOne - 127 - 23 // 0x6A //TODO + val forceOverflow = exp > exponentOne + exponentOne + 127 || input.rs1.isInfinity || input.rs2.isInfinity val forceNan = input.rs1.isNan || input.rs2.isNan || ((input.rs1.isInfinity || input.rs2.isInfinity) && (input.rs1.isZero || input.rs2.isZero)) val output = FpuFloat(p.internalExponentSize, p.internalMantissaSize) output.sign := input.rs1.sign ^ input.rs2.sign - output.exponent := (exp - math.expOffset).resized + output.exponent := (exp - exponentOne).resized output.mantissa := man output.setNormal @@ -702,7 +794,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{ val shifter = new Area { val exp21 = input.rs2.exponent -^ input.rs1.exponent - val rs1ExponentBigger = exp21.msb || input.rs2.isZeroOrSubnormal + val rs1ExponentBigger = (exp21.msb || input.rs2.isZeroOrSubnormal) && !input.rs1.isZeroOrSubnormal val rs1ExponentEqual = input.rs1.exponent === input.rs2.exponent val rs1MantissaBigger = input.rs1.mantissa > input.rs2.mantissa val absRs1Bigger = ((rs1ExponentBigger || rs1ExponentEqual && rs1MantissaBigger) && !input.rs1.isZeroOrSubnormal || input.rs1.isInfinity) && !input.rs2.isInfinity @@ -746,7 +838,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{ val exponent = xyExponent -^ shift + 1 xySign clearWhen(input.rs1.isZeroOrSubnormal && input.rs2.isZeroOrSubnormal) val forceZero = xyMantissa === 0 || exponent.msb || (input.rs1.isZeroOrSubnormal && input.rs2.isZeroOrSubnormal) - val forceOverflow = exponent(7 downto 0) === 255 || (input.rs1.isInfinity || input.rs2.isInfinity) + val forceOverflow = exponent === exponentOne + 128 || (input.rs1.isInfinity || input.rs2.isInfinity) val forceNan = input.rs1.isNan || input.rs2.isNan || (input.rs1.isInfinity && input.rs2.isInfinity && (input.rs1.sign ^ input.rs2.sign)) } @@ -773,8 +865,28 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{ } +// val format = new Area{ +// val input = pipeArbiter.arbitrated.combStage() +// +// val rotate = new Area{ +// val input = Bits(p.internalMantissaSize bits) +// val shift = UInt(log2Up(p.internalMantissaSize) bits) +// val output = input.rotateLeft(shift) +// } +// +// val decode = new Area{ +// val sign = input.raw(31) +// val exp = input.raw(23, 8 bits).asUInt +// val man = input.raw(23, 8 bits).asUInt +// val isSubnormal = exp === 0 //zero ? +// val manTop = OHToUInt(OHMasking.first((man ## U"1").reversed)) +// val shift = isSubnormal ? manTop | U(0) +// rotate.shift := shift +// } +// } + val write = new Area{ - val arbitrated = StreamArbiterFactory.lowerFirst.noLock.on(List(load.output, add.output, mul.output, shortPip.rfOutput)) + val arbitrated = StreamArbiterFactory.lowerFirst.noLock.on(List(load.s1.output, add.output, mul.output, shortPip.rfOutput)) val isCommited = rf.lock.map(_.commited).read(arbitrated.lockId) val commited = arbitrated.haltWhen(!isCommited).toFlow @@ -794,8 +906,8 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{ port.data := commited.value when(port.valid){ - assert(!(port.data.exponent === 0 && !port.data.special)) - assert(!(port.data.exponent === port.data.exponent.maxValue && !port.data.special)) + assert(!(port.data.exponent === 0 && !port.data.special), "Special violation") + assert(!(port.data.exponent === port.data.exponent.maxValue && !port.data.special), "Special violation") } } } @@ -831,11 +943,47 @@ object FpuSynthesisBench extends App{ SpinalVerilog(new Component{ val a = in UInt(width bits) val sel = in UInt(log2Up(width) bits) - val result = out(a.rotateLeft(sel)) + val result = out(Delay(Delay(a,3).rotateLeft(Delay(sel,3)),3)) setDefinitionName(Rotate.this.getName()) }) } +// rotate2_24 -> +// Artix 7 -> 233 Mhz 96 LUT 167 FF +// Artix 7 -> 420 Mhz 86 LUT 229 FF +// rotate2_32 -> +// Artix 7 -> 222 Mhz 108 LUT 238 FF +// Artix 7 -> 399 Mhz 110 LUT 300 FF +// rotate2_52 -> +// Artix 7 -> 195 Mhz 230 LUT 362 FF +// Artix 7 -> 366 Mhz 225 LUT 486 FF +// rotate2_64 -> +// Artix 7 -> 182 Mhz 257 LUT 465 FF +// Artix 7 -> 359 Mhz 266 LUT 591 FF + class Rotate2(width : Int) extends Rtl{ + override def getName(): String = "rotate2_" + width + override def getRtlPath(): String = getName() + ".v" + SpinalVerilog(new Component{ + val a = in UInt(width bits) + val sel = in UInt(log2Up(width) bits) + val result = out(Delay((U(0, width bits) @@ Delay(a,3)).rotateLeft(Delay(sel,3)),3)) + setDefinitionName(Rotate2.this.getName()) + }) + } + + class Rotate3(width : Int) extends Rtl{ + override def getName(): String = "rotate3_" + width + override def getRtlPath(): String = getName() + ".v" + SpinalVerilog(new Component{ + val a = Delay(in UInt(width bits), 3) + val sel = Delay(in UInt(log2Up(width) bits),3) +// val result = +// val output = Delay(result, 3) + setDefinitionName(Rotate3.this.getName()) + }) + } + + val rtls = ArrayBuffer[Rtl]() // rtls += new Fpu( // "32", @@ -858,10 +1006,14 @@ object FpuSynthesisBench extends App{ // rtls += new Shifter(32) // rtls += new Shifter(52) // rtls += new Shifter(64) - rtls += new Rotate(24) - rtls += new Rotate(32) - rtls += new Rotate(52) - rtls += new Rotate(64) +// rtls += new Rotate(24) +// rtls += new Rotate(32) +// rtls += new Rotate(52) +// rtls += new Rotate(64) + rtls += new Rotate3(24) + rtls += new Rotate3(32) + rtls += new Rotate3(52) + rtls += new Rotate3(64) val targets = XilinxStdTargets()// ++ AlteraStdTargets() diff --git a/src/main/scala/vexriscv/ip/fpu/Interface.scala b/src/main/scala/vexriscv/ip/fpu/Interface.scala index 42c9bcb..5389e77 100644 --- a/src/main/scala/vexriscv/ip/fpu/Interface.scala +++ b/src/main/scala/vexriscv/ip/fpu/Interface.scala @@ -50,14 +50,14 @@ case class FpuFloat(exponentSize: Int, def isNormal = !special def isZero = special && exponent(1 downto 0) === 0 - def isSubnormal = special && exponent(1 downto 0) === 1 + //def isSubnormal = special && exponent(1 downto 0) === 1 def isInfinity = special && exponent(1 downto 0) === 2 def isNan = special && exponent(1 downto 0) === 3 def isQuiet = mantissa.msb def setNormal = { special := False } def setZero = { special := True; exponent(1 downto 0) := 0 } - def setSubnormal = { special := True; exponent(1 downto 0) := 1 } + //def setSubnormal = { special := True; exponent(1 downto 0) := 1 } def setInfinity = { special := True; exponent(1 downto 0) := 2 } def setNan = { special := True; exponent(1 downto 0) := 3 } def setNanQuiet = { special := True; exponent(1 downto 0) := 3; mantissa.msb := True } @@ -65,7 +65,7 @@ case class FpuFloat(exponentSize: Int, def decode() = { val ret = FpuFloatDecoded() ret.isZero := isZero - ret.isSubnormal := isSubnormal + //ret.isSubnormal := isSubnormal ret.isNormal := isNormal ret.isInfinity := isInfinity ret.isNan := isNan @@ -101,7 +101,7 @@ case class FpuParameter( internalMantissaSize : Int, withDouble : Boolean){ val storeLoadType = HardType(Bits(if(withDouble) 64 bits else 32 bits)) - val internalExponentSize = if(withDouble) 11 else 8 + val internalExponentSize = (if(withDouble) 11 else 8) + 1 val internalFloating = HardType(FpuFloat(exponentSize = internalExponentSize, mantissaSize = internalMantissaSize)) val rfAddress = HardType(UInt(5 bits)) @@ -132,7 +132,7 @@ case class FpuCmd(p : FpuParameter) extends Bundle{ case class FpuCommit(p : FpuParameter) extends Bundle{ val write = Bool() - val load = Bool() + val sync = Bool() val value = p.storeLoadType() // IEEE 754 } diff --git a/src/main/scala/vexriscv/plugin/FpuPlugin.scala b/src/main/scala/vexriscv/plugin/FpuPlugin.scala index 4d639ef..258c029 100644 --- a/src/main/scala/vexriscv/plugin/FpuPlugin.scala +++ b/src/main/scala/vexriscv/plugin/FpuPlugin.scala @@ -11,7 +11,8 @@ class FpuPlugin(externalFpu : Boolean = false, object FPU_ENABLE extends Stageable(Bool()) object FPU_COMMIT extends Stageable(Bool()) - object FPU_LOAD extends Stageable(Bool()) + object FPU_COMMIT_SYNC extends Stageable(Bool()) + object FPU_COMMIT_LOAD extends Stageable(Bool()) object FPU_RSP extends Stageable(Bool()) object FPU_FORKED extends Stageable(Bool()) object FPU_OPCODE extends Stageable(FpuOpcode()) @@ -28,7 +29,6 @@ class FpuPlugin(externalFpu : Boolean = false, FPU_ENABLE -> True, FPU_COMMIT -> False, FPU_RSP -> True, - FPU_LOAD -> False, REGFILE_WRITE_VALID -> True, BYPASSABLE_EXECUTE_STAGE -> False, BYPASSABLE_MEMORY_STAGE -> False @@ -37,8 +37,7 @@ class FpuPlugin(externalFpu : Boolean = false, val floatRfWrite = List[ENC]( FPU_ENABLE -> True, FPU_COMMIT -> True, - FPU_RSP -> False, - FPU_LOAD -> False + FPU_RSP -> False ) val addSub = floatRfWrite :+ FPU_OPCODE -> FpuOpcode.ADD @@ -60,7 +59,6 @@ class FpuPlugin(externalFpu : Boolean = false, FPU_ENABLE -> True, FPU_OPCODE -> FpuOpcode.LOAD, FPU_COMMIT -> True, - FPU_LOAD -> True, FPU_RSP -> False ) @@ -68,7 +66,6 @@ class FpuPlugin(externalFpu : Boolean = false, FPU_ENABLE -> True, FPU_OPCODE -> FpuOpcode.STORE, FPU_COMMIT -> False, - FPU_LOAD -> False, FPU_RSP -> True ) @@ -164,7 +161,7 @@ class FpuPlugin(externalFpu : Boolean = false, //Maybe it might be better to not fork before fire to avoid RF stall on commits val forked = Reg(Bool) setWhen(port.cmd.fire) clearWhen(!arbitration.isStuck) init(False) - val intRfReady = Reg(Bool()) setWhen(!arbitration.isStuckByOthers) clearWhen(!arbitration.isStuck) + val intRfReady = Reg(Bool()) setWhen(!arbitration.isStuckByOthers) clearWhen(!arbitration.isStuck) //TODO is that still in use ? val hazard = (input(RS1_USE) && !intRfReady) || csr.pendings.msb || csr.csrActive arbitration.haltItself setWhen(arbitration.isValid && input(FPU_ENABLE) && hazard) @@ -181,6 +178,9 @@ class FpuPlugin(externalFpu : Boolean = false, port.cmd.format := FpuFormat.FLOAT insert(FPU_FORKED) := forked || port.cmd.fire + + insert(FPU_COMMIT_SYNC) := List(FpuOpcode.LOAD, FpuOpcode.FMV_W_X).map(_ === input(FPU_OPCODE)).orR + insert(FPU_COMMIT_LOAD) := input(FPU_OPCODE) === FpuOpcode.LOAD } writeBack plug new Area{ @@ -206,9 +206,9 @@ class FpuPlugin(externalFpu : Boolean = false, // Manage $load val commit = Stream(FpuCommit(p)) commit.valid := isCommit && arbitration.isMoving - commit.value.assignFromBits(output(DBUS_DATA)) + commit.value := (input(FPU_COMMIT_LOAD) ? output(DBUS_DATA) | input(RS1)) commit.write := arbitration.isValid - commit.load := input(FPU_LOAD) + commit.sync := input(FPU_COMMIT_SYNC) when(arbitration.isValid && !commit.ready){ arbitration.haltByOther := True diff --git a/src/test/scala/vexriscv/ip/fpu/FpuTest.scala b/src/test/scala/vexriscv/ip/fpu/FpuTest.scala index 17dafff..b89d193 100644 --- a/src/test/scala/vexriscv/ip/fpu/FpuTest.scala +++ b/src/test/scala/vexriscv/ip/fpu/FpuTest.scala @@ -17,7 +17,7 @@ class FpuTest extends FunSuite{ val b2f = lang.Float.intBitsToFloat(_) val f2b = lang.Float.floatToIntBits(_) def clamp(f : Float) = { - if(f.abs < b2f(0x00800000)) b2f(f2b(f) & 0x80000000) else f + f // if(f.abs < b2f(0x00800000)) b2f(f2b(f) & 0x80000000) else f } test("directed"){ @@ -74,7 +74,7 @@ class FpuTest extends FunSuite{ commitQueue += {cmd => cmd.write #= true cmd.value #= value - cmd.load #= true + cmd.sync #= true } } @@ -112,7 +112,7 @@ class FpuTest extends FunSuite{ } commitQueue += {cmd => cmd.write #= true - cmd.load #= false + cmd.sync #= false } } @@ -128,7 +128,7 @@ class FpuTest extends FunSuite{ } commitQueue += {cmd => cmd.write #= true - cmd.load #= false + cmd.sync #= false } } @@ -144,7 +144,7 @@ class FpuTest extends FunSuite{ } commitQueue += {cmd => cmd.write #= true - cmd.load #= false + cmd.sync #= false } } @@ -160,7 +160,7 @@ class FpuTest extends FunSuite{ } commitQueue += {cmd => cmd.write #= true - cmd.load #= false + cmd.sync #= false } } @@ -176,7 +176,7 @@ class FpuTest extends FunSuite{ } commitQueue += {cmd => cmd.write #= true - cmd.load #= false + cmd.sync #= false } } @@ -219,7 +219,7 @@ class FpuTest extends FunSuite{ } commitQueue += {cmd => cmd.write #= true - cmd.load #= false + cmd.sync #= false } } @@ -239,7 +239,7 @@ class FpuTest extends FunSuite{ def fmv_w_x(rd : Int, value : Int): Unit ={ cmdQueue += {cmd => cmd.opcode #= cmd.opcode.spinalEnum.FMV_W_X - cmd.value #= value.toLong & 0xFFFFFFFFl + cmd.value.randomize() cmd.rs1.randomize() cmd.rs2.randomize() cmd.rs3.randomize() @@ -248,7 +248,8 @@ class FpuTest extends FunSuite{ } commitQueue += {cmd => cmd.write #= true - cmd.load #= false + cmd.sync #= true + cmd.value #= value.toLong & 0xFFFFFFFFl } } @@ -264,7 +265,7 @@ class FpuTest extends FunSuite{ } commitQueue += {cmd => cmd.write #= true - cmd.load #= false + cmd.sync #= false } } @@ -281,7 +282,7 @@ class FpuTest extends FunSuite{ } commitQueue += {cmd => cmd.write #= true - cmd.load #= false + cmd.sync #= false } } } @@ -309,11 +310,12 @@ class FpuTest extends FunSuite{ } } def checkFloat(ref : Float, dut : Float): Boolean ={ - if(ref.signum != dut.signum) return false + if((f2b(ref) & 0x80000000) != (f2b(dut) & 0x80000000)) return false if(ref == 0.0 && dut == 0.0 && f2b(ref) != f2b(dut)) return false if(ref.isNaN && dut.isNaN) return true if(ref == dut) return true - if(ref.abs * 1.0001 > dut.abs && ref.abs * 0.9999 < dut.abs && ref.signum == dut.signum) return true + if(ref.abs * 1.0001 + Float.MinPositiveValue >= dut.abs && ref.abs * 0.9999 - Float.MinPositiveValue <= dut.abs) return true +// if(ref + Float.MinPositiveValue*2.0f === dut || dut + Float.MinPositiveValue*2.0f === ref) false } def checkFloatExact(ref : Float, dut : Float): Boolean ={ @@ -346,6 +348,16 @@ class FpuTest extends FunSuite{ } } + def testLoadStore(a : Float): Unit ={ + val rd = Random.nextInt(32) + load(rd, a) + storeFloat(rd){v => + val refUnclamped = a + val ref = a + println(f"$a = $v, $ref") + assert(f2b(v) == f2b(ref)) + } + } def testMul(a : Float, b : Float): Unit ={ val rs = new RegAllocator() val rs1, rs2, rs3 = rs.allocate() @@ -515,7 +527,7 @@ class FpuTest extends FunSuite{ def withMinus(that : Seq[Float]) = that.flatMap(f => List(f, -f)) val fZeros = withMinus(List(0.0f)) - val fSubnormals = withMinus(List(b2f(0x00000000+1), b2f(0x00000000+2), b2f(0x00800000-2), b2f(0x00800000-1))) + val fSubnormals = withMinus(List(b2f(0x00000000+1), b2f(0x00000000+2), b2f(0x00006800), b2f(0x00800000-2), b2f(0x00800000-1))) val fExpSmall = withMinus(List(b2f(0x00800000), b2f(0x00800000+1), b2f(0x00800000 + 2))) val fExpNormal = withMinus(List(b2f(0x3f800000-2), b2f(0x3f800000-1), b2f(0x3f800000), b2f(0x3f800000+1), b2f(0x3f800000+2))) val fExpBig = withMinus(List(b2f(0x7f7fffff-2), b2f(0x7f7fffff-1), b2f(0x7f7fffff))) @@ -533,13 +545,6 @@ class FpuTest extends FunSuite{ - testMul(1.2f, 0f) - for(a <- fAll; _ <- 0 until 50) testMul(a, randomFloat()) - for(b <- fAll; _ <- 0 until 50) testMul(randomFloat(), b) - for(a <- fAll; b <- fAll) testMul(a, b) - for(_ <- 0 until 1000) testMul(randomFloat(), randomFloat()) - - testAdd(b2f(0x3f800000), b2f(0x3f800000-1)) testAdd(1.1f, 2.3f) testAdd(1.2f, -1.2f) @@ -555,6 +560,28 @@ class FpuTest extends FunSuite{ for(a <- fAll; b <- fAll) testAdd(a, b) for(_ <- 0 until 1000) testAdd(randomFloat(), randomFloat()) + testLoadStore(1.2f) + testMul(1.2f, 2.5f) + testMul(b2f(0x00400000), 16.0f) + testMul(b2f(0x00100000), 16.0f) + testMul(b2f(0x00180000), 16.0f) + testMul(b2f(0x00000004), 16.0f) + testMul(b2f(0x00000040), 16.0f) + testMul(b2f(0x00000041), 16.0f) + testMul(b2f(0x00000001), b2f(0x00000001)) + testMul(1.0f, b2f(0x00000001)) + testMul(0.5f, b2f(0x00000001)) + +// dut.clockDomain.waitSampling(1000) +// simSuccess() + + testMul(1.2f, 0f) + for(a <- fAll; _ <- 0 until 50) testMul(a, randomFloat()) + for(b <- fAll; _ <- 0 until 50) testMul(randomFloat(), b) + for(a <- fAll; b <- fAll) testMul(a, b) + for(_ <- 0 until 1000) testMul(randomFloat(), randomFloat()) + +