diff --git a/src/main/scala/vexriscv/ip/fpu/FpuCore.scala b/src/main/scala/vexriscv/ip/fpu/FpuCore.scala
index 33a20fb..baa0bdd 100644
--- a/src/main/scala/vexriscv/ip/fpu/FpuCore.scala
+++ b/src/main/scala/vexriscv/ip/fpu/FpuCore.scala
@@ -47,6 +47,8 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
     val source = Source()
     val rd = p.rfAddress()
     val lockId = lockIdType()
+    val i2f = Bool()
+    val arg = Bits(2 bits)
   }
 
   case class ShortPipInput() extends Bundle{
@@ -228,13 +230,14 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
     val input = read.output.combStage()
     input.ready := False
 
-    val loadHit = List(FpuOpcode.LOAD, FpuOpcode.FMV_W_X).map(input.opcode === _).orR
+    val loadHit = List(FpuOpcode.LOAD, FpuOpcode.FMV_W_X, FpuOpcode.I2F).map(input.opcode === _).orR
     val load = Stream(LoadInput())
     load.valid := input.valid && loadHit
     input.ready setWhen(loadHit && load.ready)
     load.payload.assignSomeByName(read.output.payload)
+    load.i2f := input.opcode === FpuOpcode.I2F
 
-    val shortPipHit = List(FpuOpcode.STORE, FpuOpcode.F2I, FpuOpcode.CMP, FpuOpcode.I2F, FpuOpcode.MIN_MAX, FpuOpcode.SGNJ, FpuOpcode.FMV_X_W, FpuOpcode.FCLASS).map(input.opcode === _).orR
+    val shortPipHit = List(FpuOpcode.STORE, FpuOpcode.F2I, FpuOpcode.CMP, FpuOpcode.MIN_MAX, FpuOpcode.SGNJ, FpuOpcode.FMV_X_W, FpuOpcode.FCLASS).map(input.opcode === _).orR
     val shortPip = Stream(ShortPipInput())
     input.ready setWhen(shortPipHit && shortPip.ready)
     shortPip.valid := input.valid && shortPipHit
@@ -289,7 +292,9 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
       val source = Source()
       val lockId = lockIdType()
       val rd = p.rfAddress()
-      val value = FpuFloat(exponentSize = p.internalExponentSize-1, mantissaSize = p.internalMantissaSize)
+      val value = p.storeLoadType()
+      val i2f = Bool()
+      val arg = Bits(2 bits)
     }
 
     val s0 = new Area{
@@ -304,18 +309,28 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
       output.source := input.source
       output.lockId := input.lockId
       output.rd := input.rd
-      output.value.mantissa := feed.value(0, 23 bits).asUInt
-      output.value.exponent := feed.value(23, 8 bits).asUInt
-      output.value.sign := feed.value(31)
+      output.value := feed.value
+      output.i2f := input.i2f
+      output.arg := input.arg
     }
 
+
+//    val i2fSign = input.arg(0) && input.value.msb
+//    val i2fUnsigned = input.value.asUInt.twoComplement(i2fSign).resize(32 bits)
+//    val i2fLog2 = OHToUInt(OHMasking.last(i2fUnsigned))
+//    val i2fShifted = (i2fUnsigned << p.internalMantissaSize) >> i2fLog2
+//    rfOutput.value.sign := i2fSign
+//    rfOutput.value.exponent := i2fLog2 +^ exponentOne
+//    rfOutput.value.mantissa := U(i2fShifted).resized
+//    rfOutput.value.special := False //TODO
+
     val s1 = new Area{
       val input = s0.output.stage()
       val busy = False
 
-      val f32Mantissa = input.value.mantissa
-      val f32Exponent = input.value.exponent
-      val f32Sign     = input.value.sign
+      val f32Mantissa = input.value(0, 23 bits).asUInt
+      val f32Exponent = input.value(23, 8 bits).asUInt
+      val f32Sign     = input.value(31)
 
       val expZero = f32Exponent === 0
       val expOne =  f32Exponent === 255
@@ -329,18 +344,31 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
       val isNan       =  expOne  && !manZero
       val isQuiet     = f32Mantissa.msb
 
-      val subnormal = new Area{
+      val fsm = new Area{
         val manTop = Reg(UInt(log2Up(p.internalMantissaSize) bits))
-        val shift = isSubnormal ? manTop | U(0)
+        val shift =  CombInit(manTop)
         val counter = Reg(UInt(log2Up(p.internalMantissaSize+1) bits))
-        val done, boot = Reg(Bool())
-        when(isSubnormal && !done){
+        val done, boot, patched = Reg(Bool())
+        val ohInput = CombInit(input.value(0, 32 max p.internalMantissaSize bits))
+        when(!input.i2f) { ohInput(9, 23 bits) := input.value(0, 23 bits) }
+        val i2fZero = Reg(Bool)
+        when(input.valid && (input.i2f || isSubnormal) && !done){
           busy := True
           when(boot){
-            manTop := OHToUInt(OHMasking.first((f32Mantissa).reversed))
-            boot := False
+            when(input.i2f && !patched && input.value.msb && input.arg(0)){
+              input.value.getDrivingReg(0, 32 bits) := B(input.value.asUInt.twoComplement(True).resize(32 bits))
+              patched := True
+            } otherwise {
+              manTop := OHToUInt(OHMasking.first((ohInput).reversed))
+              boot := False
+              i2fZero := input.value(31 downto 0) === 0
+            }
           } otherwise {
-            input.value.mantissa.getDrivingReg := input.value.mantissa |<< 1
+            when(input.i2f){
+              input.value.getDrivingReg(0, 32 bits) := input.value(0, 32 bits) |<< 1
+            } otherwise {
+              input.value.getDrivingReg(0, 23 bits) := input.value(0, 23 bits) |<< 1
+            }
             counter := counter + 1
             when(counter === shift) {
               done := True
@@ -358,16 +386,20 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
           counter := 0
           done := False
           boot := True
+          patched := False
         }
       }
 
+
+      val i2fSign = fsm.patched
+      val i2fShifted = input.value.takeHigh(23)
+
       val recoded = p.internalFloating()
       recoded.mantissa := f32Mantissa
-      recoded.exponent := (f32Exponent -^ subnormal.expOffset + (exponentOne - 127)).resized
+      recoded.exponent := (f32Exponent -^ fsm.expOffset + (exponentOne - 127)).resized
       recoded.sign     := f32Sign
       recoded.setNormal
       when(isZero){recoded.setZero}
-      //when(isSubnormal){recoded.setSubnormal}
       when(isInfinity){recoded.setInfinity}
       when(isNan){recoded.setNan}
 
@@ -376,6 +408,13 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
       output.lockId := input.lockId
       output.rd := input.rd
       output.value := recoded
+      when(input.i2f){
+        output.value.sign := i2fSign
+        output.value.exponent := (U(exponentOne+31) - fsm.manTop).resized
+        output.value.mantissa := U(i2fShifted)
+        output.value.setNormal
+        when(fsm.i2fZero) { output.value.setZero }
+      }
     }
   }
 
@@ -401,10 +440,9 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
     val subnormal = new Area{
       val needRecoding = List(FpuOpcode.FMV_X_W, FpuOpcode.STORE).map(_ === input.opcode).orR
       val manTop = Reg(UInt(log2Up(p.internalMantissaSize) bits))
-      val shift = isSubnormal ? manTop | U(0)
       val counter = Reg(UInt(log2Up(p.internalMantissaSize+1) bits))
       val done, boot = Reg(Bool())
-      when(needRecoding && isSubnormal && !done){
+      when(input.valid && needRecoding && isSubnormal && !done){
         halt := True
         when(boot){
           manTop := (U(exponentOne - 127) - recoded.exponent).resized
@@ -412,7 +450,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
         } otherwise {
           recoded.mantissa.getDrivingReg := (U(counter === 0) @@ recoded.mantissa) >> 1
           counter := counter + 1
-          when(counter === shift) {
+          when(counter === manTop) {
             done := True
           }
         }
@@ -450,11 +488,6 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
     val f2iUnsigned = f2iShifted >> p.internalMantissaSize
     val f2iResult = (f2iUnsigned.twoComplement(input.arg(0) && input.rs1.sign)).asBits.resize(32 bits)
 
-    val i2fSign = input.arg(0) && input.value.msb
-    val i2fUnsigned = input.value.asUInt.twoComplement(i2fSign).resize(32 bits)
-    val i2fLog2 = OHToUInt(OHMasking.last(i2fUnsigned))
-    val i2fShifted = (i2fUnsigned << p.internalMantissaSize) >> i2fLog2
-
     val bothZero = input.rs1.isZero && input.rs2.isZero
     val rs1Equal = input.rs1 === input.rs2
     val rs1AbsSmaller = (input.rs1.exponent @@ input.rs1.mantissa) < (input.rs2.exponent @@ input.rs2.mantissa)
@@ -496,7 +529,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
       is(FpuOpcode.FCLASS)  { result := fclassResult.resized }
     }
 
-    val toFpuRf = List(FpuOpcode.MIN_MAX, FpuOpcode.I2F, FpuOpcode.SGNJ).map(input.opcode === _).orR
+    val toFpuRf = List(FpuOpcode.MIN_MAX, FpuOpcode.SGNJ).map(input.opcode === _).orR
 
     rfOutput.valid := input.valid && toFpuRf && !halt
     rfOutput.source := input.source
@@ -504,12 +537,6 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
     rfOutput.rd := input.rd
     rfOutput.value.assignDontCare()
     switch(input.opcode){
-      is(FpuOpcode.I2F){
-        rfOutput.value.sign := i2fSign
-        rfOutput.value.exponent := i2fLog2 +^ exponentOne
-        rfOutput.value.mantissa := U(i2fShifted).resized
-        rfOutput.value.special := False //TODO
-      }
       is(FpuOpcode.MIN_MAX){
         rfOutput.value := minMaxResult
       }
@@ -550,7 +577,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
       val exp = math.exp + U(needShift)
       val man = needShift ? mulRounded(1, p.internalMantissaSize bits) | mulRounded(0, p.internalMantissaSize bits)
 
-      val forceZero = input.rs1.isZeroOrSubnormal || input.rs2.isZeroOrSubnormal
+      val forceZero = input.rs1.isZero || input.rs2.isZero
       val forceUnderflow = exp <= exponentOne + exponentOne - 127 - 23  // 0x6A //TODO
       val forceOverflow = exp > exponentOne + exponentOne + 127 || input.rs1.isInfinity || input.rs2.isInfinity
       val forceNan = input.rs1.isNan || input.rs2.isNan || ((input.rs1.isInfinity || input.rs2.isInfinity) && (input.rs1.isZero || input.rs2.isZero))
@@ -717,8 +744,8 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
         decode.divSqrtToMul.rs2.exponent := divExp.value + iterationValue.msb.asUInt
         decode.divSqrtToMul.rs2.mantissa := (iterationValue << 1).resized
         val zero = input.rs2.isInfinity
-        val overflow = input.rs2.isZeroOrSubnormal
-        val nan = input.rs2.isNan || (input.rs1.isZeroOrSubnormal && input.rs2.isZeroOrSubnormal)
+        val overflow = input.rs2.isZero
+        val nan = input.rs2.isNan || (input.rs1.isZero && input.rs2.isZero)
 
         when(nan){
           decode.divSqrtToMul.rs2.setNanQuiet
@@ -785,12 +812,12 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
 
     val shifter = new Area {
       val exp21 = input.rs2.exponent -^ input.rs1.exponent
-      val rs1ExponentBigger = (exp21.msb || input.rs2.isZeroOrSubnormal) && !input.rs1.isZeroOrSubnormal
+      val rs1ExponentBigger = (exp21.msb || input.rs2.isZero) && !input.rs1.isZero
       val rs1ExponentEqual = input.rs1.exponent === input.rs2.exponent
       val rs1MantissaBigger = input.rs1.mantissa > input.rs2.mantissa
-      val absRs1Bigger = ((rs1ExponentBigger || rs1ExponentEqual && rs1MantissaBigger) && !input.rs1.isZeroOrSubnormal || input.rs1.isInfinity) && !input.rs2.isInfinity
+      val absRs1Bigger = ((rs1ExponentBigger || rs1ExponentEqual && rs1MantissaBigger) && !input.rs1.isZero || input.rs1.isInfinity) && !input.rs2.isInfinity
       val shiftBy = rs1ExponentBigger ? (0-exp21) | exp21
-      val passThrough = shiftBy >= p.internalMantissaSize || (input.rs1.isZeroOrSubnormal) || (input.rs2.isZeroOrSubnormal)
+      val passThrough = shiftBy >= p.internalMantissaSize || (input.rs1.isZero) || (input.rs2.isZero)
 
       //Note that rs1ExponentBigger can be replaced by absRs1Bigger bellow to avoid xsigned two complement in math block at expense of combinatorial path
       val xySign = absRs1Bigger ? input.rs1.sign | input.rs2.sign
@@ -827,8 +854,8 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
 //      val mantissaShifted = (xyMantissa |<< shift)
 //      val mantissa = ((xyMantissa ) >> 2) + U(xyMantissa(1))
       val exponent = xyExponent -^ shift + 1
-      xySign clearWhen(input.rs1.isZeroOrSubnormal && input.rs2.isZeroOrSubnormal)
-      val forceZero = xyMantissa === 0 || exponent.msb || (input.rs1.isZeroOrSubnormal && input.rs2.isZeroOrSubnormal)
+      xySign clearWhen(input.rs1.isZero && input.rs2.isZero)
+      val forceZero = xyMantissa === 0 || exponent.msb || (input.rs1.isZero && input.rs2.isZero)
       val forceOverflow = exponent === exponentOne + 128 ||  (input.rs1.isInfinity || input.rs2.isInfinity)
       val forceNan = input.rs1.isNan || input.rs2.isNan || (input.rs1.isInfinity && input.rs2.isInfinity && (input.rs1.sign ^ input.rs2.sign))
     }
@@ -847,7 +874,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
       output.value.setNanQuiet
     } elsewhen(norm.forceZero) {
       output.value.setZero
-      when(norm.xyMantissa === 0 || input.rs1.isZeroOrSubnormal && input.rs2.isZeroOrSubnormal){
+      when(norm.xyMantissa === 0 || input.rs1.isZero && input.rs2.isZero){
         output.value.sign := input.rs1.sign && input.rs2.sign
       }
     } elsewhen(norm.forceOverflow) {
@@ -856,26 +883,6 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
   }
 
 
-//  val format = new Area{
-//    val input = pipeArbiter.arbitrated.combStage()
-//
-//    val rotate = new Area{
-//      val input = Bits(p.internalMantissaSize bits)
-//      val shift = UInt(log2Up(p.internalMantissaSize) bits)
-//      val output = input.rotateLeft(shift)
-//    }
-//
-//    val decode = new Area{
-//      val sign = input.raw(31)
-//      val exp = input.raw(23, 8 bits).asUInt
-//      val man = input.raw(23, 8 bits).asUInt
-//      val isSubnormal = exp === 0 //zero ?
-//      val manTop = OHToUInt(OHMasking.first((man ## U"1").reversed))
-//      val shift = isSubnormal ? manTop | U(0)
-//      rotate.shift := shift
-//    }
-//  }
-
   val write = new Area{
     val arbitrated = StreamArbiterFactory.lowerFirst.noLock.on(List(load.s1.output, add.output, mul.output, shortPip.rfOutput))
     val isCommited = rf.lock.map(_.commited).read(arbitrated.lockId)
diff --git a/src/main/scala/vexriscv/ip/fpu/Interface.scala b/src/main/scala/vexriscv/ip/fpu/Interface.scala
index e079e3e..82c2ba4 100644
--- a/src/main/scala/vexriscv/ip/fpu/Interface.scala
+++ b/src/main/scala/vexriscv/ip/fpu/Interface.scala
@@ -25,9 +25,8 @@ case class FpuFloatDecoded() extends Bundle{
 
 object FpuFloat{
   val ZERO = 0
-  val SUBNORMAL = 1
-  val INFINITY = 2
-  val NAN = 3
+  val INFINITY = 1
+  val NAN = 2
 }
 
 case class FpuFloat(exponentSize: Int,
@@ -45,22 +44,17 @@ case class FpuFloat(exponentSize: Int,
     ret
   }
 
-
-  def isZeroOrSubnormal =  special && exponent(1) === False
-
   def isNormal    = !special
-  def isZero      =  special && exponent(1 downto 0) === 0
-  //def isSubnormal =  special && exponent(1 downto 0) === 1
-  def isInfinity  =  special && exponent(1 downto 0) === 2
-  def isNan       =  special && exponent(1 downto 0) === 3
+  def isZero      =  special && exponent(1 downto 0) === FpuFloat.ZERO
+  def isInfinity  =  special && exponent(1 downto 0) === FpuFloat.INFINITY
+  def isNan       =  special && exponent(1 downto 0) === FpuFloat.NAN
   def isQuiet     =  mantissa.msb
 
   def setNormal    =  { special := False }
-  def setZero      =  { special := True; exponent(1 downto 0) := 0 }
-  //def setSubnormal =  { special := True; exponent(1 downto 0) := 1 }
-  def setInfinity  =  { special := True; exponent(1 downto 0) := 2 }
-  def setNan       =  { special := True; exponent(1 downto 0) := 3 }
-  def setNanQuiet  =  { special := True; exponent(1 downto 0) := 3; mantissa.msb := True }
+  def setZero      =  { special := True; exponent(1 downto 0) := FpuFloat.ZERO }
+  def setInfinity  =  { special := True; exponent(1 downto 0) := FpuFloat.INFINITY }
+  def setNan       =  { special := True; exponent(1 downto 0) := FpuFloat.NAN }
+  def setNanQuiet  =  { special := True; exponent(1 downto 0) := FpuFloat.NAN ; mantissa.msb := True }
 
   def decode() = {
     val ret = FpuFloatDecoded()
@@ -122,7 +116,6 @@ case class FpuCompletion() extends Bundle{
 
 case class FpuCmd(p : FpuParameter) extends Bundle{
   val opcode = p.Opcode()
-  val value = Bits(32 bits) // Int to float
   val arg = Bits(2 bits) 
   val rs1, rs2, rs3 = p.rfAddress()
   val rd = p.rfAddress()
diff --git a/src/main/scala/vexriscv/plugin/FpuPlugin.scala b/src/main/scala/vexriscv/plugin/FpuPlugin.scala
index 258c029..8f2bfa2 100644
--- a/src/main/scala/vexriscv/plugin/FpuPlugin.scala
+++ b/src/main/scala/vexriscv/plugin/FpuPlugin.scala
@@ -161,15 +161,13 @@ class FpuPlugin(externalFpu : Boolean = false,
       //Maybe it might be better to not fork before fire to avoid RF stall on commits
       val forked = Reg(Bool) setWhen(port.cmd.fire) clearWhen(!arbitration.isStuck) init(False)
 
-      val intRfReady = Reg(Bool()) setWhen(!arbitration.isStuckByOthers) clearWhen(!arbitration.isStuck) //TODO is that still in use ?
-      val hazard = (input(RS1_USE) && !intRfReady) || csr.pendings.msb || csr.csrActive
+      val hazard = csr.pendings.msb || csr.csrActive
 
       arbitration.haltItself setWhen(arbitration.isValid && input(FPU_ENABLE) && hazard)
       arbitration.haltItself setWhen(port.cmd.isStall)
 
       port.cmd.valid    := arbitration.isValid && input(FPU_ENABLE) && !forked && !hazard
       port.cmd.opcode   := input(FPU_OPCODE)
-      port.cmd.value    := RegNext(output(RS1))
       port.cmd.arg      := input(FPU_ARG)
       port.cmd.rs1      := ((input(FPU_OPCODE) === FpuOpcode.STORE) ? input(INSTRUCTION)(rs2Range).asUInt | input(INSTRUCTION)(rs1Range).asUInt)
       port.cmd.rs2      := input(INSTRUCTION)(rs2Range).asUInt
@@ -179,7 +177,7 @@ class FpuPlugin(externalFpu : Boolean = false,
 
       insert(FPU_FORKED) := forked || port.cmd.fire
 
-      insert(FPU_COMMIT_SYNC) := List(FpuOpcode.LOAD, FpuOpcode.FMV_W_X).map(_ === input(FPU_OPCODE)).orR
+      insert(FPU_COMMIT_SYNC) := List(FpuOpcode.LOAD, FpuOpcode.FMV_W_X, FpuOpcode.I2F).map(_ === input(FPU_OPCODE)).orR
       insert(FPU_COMMIT_LOAD) := input(FPU_OPCODE) === FpuOpcode.LOAD
     }
 
diff --git a/src/test/scala/vexriscv/ip/fpu/FpuTest.scala b/src/test/scala/vexriscv/ip/fpu/FpuTest.scala
index e349611..0bfeb4c 100644
--- a/src/test/scala/vexriscv/ip/fpu/FpuTest.scala
+++ b/src/test/scala/vexriscv/ip/fpu/FpuTest.scala
@@ -64,7 +64,6 @@ class FpuTest extends FunSuite{
         def loadRaw(rd : Int, value : BigInt): Unit ={
           cmdQueue += {cmd =>
             cmd.opcode #= cmd.opcode.spinalEnum.LOAD
-            cmd.value.randomize()
             cmd.rs1.randomize()
             cmd.rs2.randomize()
             cmd.rs3.randomize()
@@ -85,7 +84,6 @@ class FpuTest extends FunSuite{
         def storeRaw(rs : Int)(body : FpuRsp => Unit): Unit ={
           cmdQueue += {cmd =>
             cmd.opcode #= cmd.opcode.spinalEnum.STORE
-            cmd.value.randomize()
             cmd.rs1 #= rs
             cmd.rs2.randomize()
             cmd.rs3.randomize()
@@ -103,7 +101,6 @@ class FpuTest extends FunSuite{
         def mul(rd : Int, rs1 : Int, rs2 : Int): Unit ={
           cmdQueue += {cmd =>
             cmd.opcode #= cmd.opcode.spinalEnum.MUL
-            cmd.value.randomize()
             cmd.rs1 #= rs1
             cmd.rs2 #= rs2
             cmd.rs3.randomize()
@@ -119,7 +116,6 @@ class FpuTest extends FunSuite{
         def add(rd : Int, rs1 : Int, rs2 : Int): Unit ={
           cmdQueue += {cmd =>
             cmd.opcode #= cmd.opcode.spinalEnum.ADD
-            cmd.value.randomize()
             cmd.rs1 #= rs1
             cmd.rs2 #= rs2
             cmd.rs3.randomize()
@@ -135,7 +131,6 @@ class FpuTest extends FunSuite{
         def div(rd : Int, rs1 : Int, rs2 : Int): Unit ={
           cmdQueue += {cmd =>
             cmd.opcode #= cmd.opcode.spinalEnum.DIV
-            cmd.value.randomize()
             cmd.rs1 #= rs1
             cmd.rs2 #= rs2
             cmd.rs3.randomize()
@@ -151,7 +146,6 @@ class FpuTest extends FunSuite{
         def sqrt(rd : Int, rs1 : Int): Unit ={
           cmdQueue += {cmd =>
             cmd.opcode #= cmd.opcode.spinalEnum.SQRT
-            cmd.value.randomize()
             cmd.rs1 #= rs1
             cmd.rs2.randomize()
             cmd.rs3.randomize()
@@ -167,7 +161,6 @@ class FpuTest extends FunSuite{
         def fma(rd : Int, rs1 : Int, rs2 : Int, rs3 : Int): Unit ={
           cmdQueue += {cmd =>
             cmd.opcode #= cmd.opcode.spinalEnum.FMA
-            cmd.value.randomize()
             cmd.rs1 #= rs1
             cmd.rs2 #= rs2
             cmd.rs3 #= rs3
@@ -184,7 +177,6 @@ class FpuTest extends FunSuite{
         def cmp(rs1 : Int, rs2 : Int)(body : FpuRsp => Unit): Unit ={
           cmdQueue += {cmd =>
             cmd.opcode #= cmd.opcode.spinalEnum.CMP
-            cmd.value.randomize()
             cmd.rs1 #= rs1
             cmd.rs2 #= rs2
             cmd.rs3.randomize()
@@ -197,7 +189,6 @@ class FpuTest extends FunSuite{
         def f2i(rs1 : Int, signed : Boolean)(body : FpuRsp => Unit): Unit ={
           cmdQueue += {cmd =>
             cmd.opcode #= cmd.opcode.spinalEnum.F2I
-            cmd.value.randomize()
             cmd.rs1 #= rs1
             cmd.rs2.randomize()
             cmd.rs3.randomize()
@@ -210,7 +201,6 @@ class FpuTest extends FunSuite{
         def i2f(rd : Int, value : Int, signed : Boolean): Unit ={
           cmdQueue += {cmd =>
             cmd.opcode #= cmd.opcode.spinalEnum.I2F
-            cmd.value #= value.toLong & 0xFFFFFFFFl
             cmd.rs1.randomize()
             cmd.rs2.randomize()
             cmd.rs3.randomize()
@@ -219,14 +209,14 @@ class FpuTest extends FunSuite{
           }
           commitQueue += {cmd =>
             cmd.write #= true
-            cmd.sync #= false
+            cmd.sync #= true
+            cmd.value #= value.toLong & 0xFFFFFFFFl
           }
         }
 
         def fmv_x_w(rs1 : Int)(body : FpuRsp => Unit): Unit ={
           cmdQueue += {cmd =>
             cmd.opcode #= cmd.opcode.spinalEnum.FMV_X_W
-            cmd.value.randomize()
             cmd.rs1 #= rs1
             cmd.rs2.randomize()
             cmd.rs3.randomize()
@@ -239,7 +229,6 @@ class FpuTest extends FunSuite{
         def fmv_w_x(rd : Int, value : Int): Unit ={
           cmdQueue += {cmd =>
             cmd.opcode #= cmd.opcode.spinalEnum.FMV_W_X
-            cmd.value.randomize()
             cmd.rs1.randomize()
             cmd.rs2.randomize()
             cmd.rs3.randomize()
@@ -256,7 +245,6 @@ class FpuTest extends FunSuite{
         def min(rd : Int, rs1 : Int, rs2 : Int): Unit ={
           cmdQueue += {cmd =>
             cmd.opcode #= cmd.opcode.spinalEnum.MIN_MAX
-            cmd.value.randomize()
             cmd.rs1 #= rs1
             cmd.rs2 #= rs2
             cmd.rs3.randomize()
@@ -273,7 +261,6 @@ class FpuTest extends FunSuite{
         def sgnj(rd : Int, rs1 : Int, rs2 : Int): Unit ={
           cmdQueue += {cmd =>
             cmd.opcode #= cmd.opcode.spinalEnum.SGNJ
-            cmd.value.randomize()
             cmd.rs1 #= rs1
             cmd.rs2 #= rs2
             cmd.rs3.randomize()
@@ -447,9 +434,11 @@ class FpuTest extends FunSuite{
           val rd = Random.nextInt(32)
           i2f(rd, a, signed)
           storeFloat(rd){v =>
-            val ref = a.toInt
-            println(f"i2f($a) = $v, $ref")
-            assert(v === ref)
+            val aLong = if(signed) a.toLong else a.toLong & 0xFFFFFFFFl
+            val ref = if(signed) a.toFloat else (a.toLong & 0xFFFFFFFFl).toFloat
+            println(f"i2f($aLong) = $v, $ref")
+            if(ref.abs < (1 << 22)) assert(v === ref)
+            assert(checkFloat(v, ref))
           }
         }
 
@@ -542,6 +531,65 @@ class FpuTest extends FunSuite{
         val fNan = List(Float.NaN, b2f(0x7f820000), b2f(0x7fc00000))
         val fAll = fZeros ++ fSubnormals ++ fExpSmall ++ fExpNormal ++ fExpBig ++ fInfinity ++ fNan
 
+        val iSmall = (0 to 20)
+        val iBigUnsigned = (0 to 20).map(e => 0xFFFFFFFF - e)
+        val iBigSigned = (0 to 20).map(e => 0x7FFFFFFF - e) ++ (0 to 20).map(e => 0x80000000 + e)
+        val iUnsigned = iSmall ++ iBigUnsigned
+        val iSigned = iSmall ++ iSmall.map(-_) ++ iBigSigned
+
+
+        testLoadStore(1.17549435082e-38f)
+        testLoadStore(1.4E-45f)
+        testLoadStore(3.44383110592e-41f)
+
+        testAdd(b2f(0x3f800000), b2f(0x3f800000-1))
+        testAdd(1.1f, 2.3f)
+        testAdd(1.2f, -1.2f)
+        testAdd(-1.2f, 1.2f)
+        testAdd(0.0f, -1.2f)
+        testAdd(-0.0f, -1.2f)
+        testAdd(1.2f, -0f)
+        testAdd(1.2f, 0f)
+        testAdd(1.1f, Float.MinPositiveValue)
+
+        for(a <- fAll; _ <- 0 until 50) testAdd(a, randomFloat())
+        for(b <- fAll; _ <- 0 until 50) testAdd(randomFloat(), b)
+        for(a <- fAll; b <- fAll) testAdd(a, b)
+        for(_ <- 0 until 1000) testAdd(randomFloat(), randomFloat())
+
+
+
+        testLoadStore(1.2f)
+        testMul(1.2f, 2.5f)
+        testMul(b2f(0x00400000), 16.0f)
+        testMul(b2f(0x00100000), 16.0f)
+        testMul(b2f(0x00180000), 16.0f)
+        testMul(b2f(0x00000004), 16.0f)
+        testMul(b2f(0x00000040), 16.0f)
+        testMul(b2f(0x00000041), 16.0f)
+        testMul(b2f(0x00000001), b2f(0x00000001))
+        testMul(1.0f, b2f(0x00000001))
+        testMul(0.5f, b2f(0x00000001))
+
+        //        dut.clockDomain.waitSampling(1000)
+        //        simSuccess()
+
+        testMul(1.2f, 0f)
+        for(a <- fAll; _ <- 0 until 50) testMul(a, randomFloat())
+        for(b <- fAll; _ <- 0 until 50) testMul(randomFloat(), b)
+        for(a <- fAll; b <- fAll) testMul(a, b)
+        for(_ <- 0 until 1000) testMul(randomFloat(), randomFloat())
+
+
+
+        testLoadStore(1.765f)
+        testFmv_w_x(lang.Float.floatToIntBits(7.234f))
+        testI2f(64, false)
+        for(i <- iUnsigned) testI2f(i, false)
+        for(i <- iSigned) testI2f(i, true)
+        for(_ <- 0 until 1000) testI2f(Random.nextInt(), Random.nextBoolean())
+
+
         testCmp(0.0f, 1.2f )
         testCmp(1.2f, 0.0f )
         testCmp(0.0f, -0.0f )
@@ -576,41 +624,6 @@ class FpuTest extends FunSuite{
 
 
 
-        testAdd(b2f(0x3f800000), b2f(0x3f800000-1))
-        testAdd(1.1f, 2.3f)
-        testAdd(1.2f, -1.2f)
-        testAdd(-1.2f, 1.2f)
-        testAdd(0.0f, -1.2f)
-        testAdd(-0.0f, -1.2f)
-        testAdd(1.2f, -0f)
-        testAdd(1.2f, 0f)
-        testAdd(1.1f, Float.MinPositiveValue)
-
-        for(a <- fAll; _ <- 0 until 50) testAdd(a, randomFloat())
-        for(b <- fAll; _ <- 0 until 50) testAdd(randomFloat(), b)
-        for(a <- fAll; b <- fAll) testAdd(a, b)
-        for(_ <- 0 until 1000) testAdd(randomFloat(), randomFloat())
-
-        testLoadStore(1.2f)
-        testMul(1.2f, 2.5f)
-        testMul(b2f(0x00400000), 16.0f)
-        testMul(b2f(0x00100000), 16.0f)
-        testMul(b2f(0x00180000), 16.0f)
-        testMul(b2f(0x00000004), 16.0f)
-        testMul(b2f(0x00000040), 16.0f)
-        testMul(b2f(0x00000041), 16.0f)
-        testMul(b2f(0x00000001), b2f(0x00000001))
-        testMul(1.0f, b2f(0x00000001))
-        testMul(0.5f, b2f(0x00000001))
-
-//        dut.clockDomain.waitSampling(1000)
-//        simSuccess()
-
-        testMul(1.2f, 0f)
-        for(a <- fAll; _ <- 0 until 50) testMul(a, randomFloat())
-        for(b <- fAll; _ <- 0 until 50) testMul(randomFloat(), b)
-        for(a <- fAll; b <- fAll) testMul(a, b)
-        for(_ <- 0 until 1000) testMul(randomFloat(), randomFloat())