diff --git a/src/main/scala/vexriscv/ip/fpu/FpuCore.scala b/src/main/scala/vexriscv/ip/fpu/FpuCore.scala index 5d31fe9..245209e 100644 --- a/src/main/scala/vexriscv/ip/fpu/FpuCore.scala +++ b/src/main/scala/vexriscv/ip/fpu/FpuCore.scala @@ -32,6 +32,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{ val opcode = p.Opcode() val rs1, rs2, rs3 = p.rfAddress() val rd = p.rfAddress() + val value = Bits(32 bits) } case class RfReadOutput() extends Bundle{ @@ -40,6 +41,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{ val lockId = lockIdType() val rs1, rs2, rs3 = p.internalFloating() val rd = p.rfAddress() + val value = Bits(32 bits) } @@ -67,6 +69,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{ val minus = Bool() } + case class DivSqrtInput() extends Bundle{ val source = Source() val rs1, rs2 = p.internalFloating() @@ -75,6 +78,14 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{ val div = Bool() } + case class I2fInput() extends Bundle{ + val source = Source() + val rd = p.rfAddress() + val lockId = lockIdType() + val value = Bits(32 bits) + } + + case class AddInput() extends Bundle{ val source = Source() val rs1, rs2 = p.internalFloating() @@ -223,6 +234,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{ output.source := s1.source output.opcode := s1.opcode output.lockId := s1LockId + output.value := s1.value output.rd := s1.rd output.rs1 := rf.ram.readSync(s0.source @@ s0.rs1,enable = !output.isStall) output.rs2 := rf.ram.readSync(s0.source @@ s0.rs2,enable = !output.isStall) @@ -251,6 +263,17 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{ coreRsp.rs1 := read.output.rs1 coreRsp.rs2 := read.output.rs2 + + val i2fHit = input.opcode === p.Opcode.I2F + val i2f = Stream(I2fInput()) + i2f.valid := input.valid && i2fHit + input.ready setWhen(i2fHit && i2f.ready) + i2f.source := read.output.source + i2f.rd := read.output.rd + i2f.value := read.output.value + i2f.lockId := read.output.lockId + + val divSqrtHit = input.opcode === p.Opcode.DIV || input.opcode === p.Opcode.SQRT val divSqrt = Stream(DivSqrtInput()) input.ready setWhen(divSqrtHit && divSqrt.ready) @@ -296,6 +319,23 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{ } } + val i2f = new Area{ + val input = decode.i2f.stage() + val output = input.swapPayload(WriteInput()) + + val iLog2 = OHToUInt(OHMasking.last(input.value)) + val shifted = (input.value << p.internalMantissaSize) >> iLog2 + + output.source := input.source + output.lockId := input.lockId + output.rd := input.rd + output.value.sign := False + output.value.exponent := iLog2 +^ exponentOne + output.value.mantissa := U(shifted).resized + } + + + val load = new Area{ val input = decode.load.stage() val filtred = commitFork.load.map(port => port.takeWhen(port.load)) @@ -610,7 +650,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{ val write = new Area{ - val arbitrated = StreamArbiterFactory.lowerFirst.noLock.on(List(load.output, add.output, mul.output)) + val arbitrated = StreamArbiterFactory.lowerFirst.noLock.on(List(load.output, add.output, mul.output, i2f.output)) val isCommited = rf.lock.map(_.commited).read(arbitrated.lockId) val commited = arbitrated.haltWhen(!isCommited).toFlow diff --git a/src/main/scala/vexriscv/plugin/FpuPlugin.scala b/src/main/scala/vexriscv/plugin/FpuPlugin.scala index e61d224..7b55ef3 100644 --- a/src/main/scala/vexriscv/plugin/FpuPlugin.scala +++ b/src/main/scala/vexriscv/plugin/FpuPlugin.scala @@ -27,6 +27,7 @@ class FpuPlugin(externalFpu : Boolean = false, decoderService.add(List( FADD_S -> List(FPU_ENABLE -> True, FPU_OPCODE -> FpuOpcode.ADD, FPU_COMMIT -> True, FPU_ALU -> True , FPU_LOAD -> False, FPU_RSP -> False), FLW -> List(FPU_ENABLE -> True, FPU_OPCODE -> FpuOpcode.LOAD, FPU_COMMIT -> True, FPU_ALU -> False, FPU_LOAD -> True , FPU_RSP -> False), + FCVT_S_WU -> List(FPU_ENABLE -> True, FPU_OPCODE -> FpuOpcode.I2F , FPU_COMMIT -> True , FPU_ALU -> True, FPU_LOAD -> False, FPU_RSP -> False, RS1_USE -> True), FSW -> List(FPU_ENABLE -> True, FPU_OPCODE -> FpuOpcode.STORE, FPU_COMMIT -> False, FPU_ALU -> False, FPU_LOAD -> False, FPU_RSP -> True), FCVT_WU_S -> List(FPU_ENABLE -> True, FPU_OPCODE -> FpuOpcode.F2I , FPU_COMMIT -> False, FPU_ALU -> False, FPU_LOAD -> False, FPU_RSP -> True, REGFILE_WRITE_VALID -> True, BYPASSABLE_EXECUTE_STAGE -> False, BYPASSABLE_MEMORY_STAGE -> False), FLE_S -> List(FPU_ENABLE -> True, FPU_OPCODE -> FpuOpcode.CMP , FPU_COMMIT -> False, FPU_ALU -> False, FPU_LOAD -> False, FPU_RSP -> True, REGFILE_WRITE_VALID -> True, BYPASSABLE_EXECUTE_STAGE -> False, BYPASSABLE_MEMORY_STAGE -> False) @@ -58,10 +59,15 @@ class FpuPlugin(externalFpu : Boolean = false, //Maybe it might be better to not fork before fire to avoid RF stall on commits val forked = Reg(Bool) setWhen(port.cmd.fire) clearWhen(!arbitration.isStuck) init(False) + val i2fReady = Reg(Bool()) setWhen(!arbitration.isStuckByOthers) clearWhen(!arbitration.isStuck) + val i2fHazard = input(FPU_OPCODE) === FpuOpcode.I2F && !i2fReady + + arbitration.haltItself setWhen(arbitration.isValid && i2fHazard) arbitration.haltItself setWhen(port.cmd.isStall) - port.cmd.valid := arbitration.isValid && input(FPU_ENABLE) && !forked + + port.cmd.valid := arbitration.isValid && input(FPU_ENABLE) && !forked && !i2fHazard port.cmd.opcode := input(FPU_OPCODE) - port.cmd.value := output(RS1) + port.cmd.value := RegNext(output(RS1)) port.cmd.function := 0 port.cmd.rs1 := input(INSTRUCTION)(rs1Range).asUInt port.cmd.rs2 := input(INSTRUCTION)(rs2Range).asUInt diff --git a/src/test/cpp/raw/fpu/build/fpu.asm b/src/test/cpp/raw/fpu/build/fpu.asm index f65b2ab..b769972 100644 --- a/src/test/cpp/raw/fpu/build/fpu.asm +++ b/src/test/cpp/raw/fpu/build/fpu.asm @@ -25,7 +25,7 @@ Disassembly of section .crt_section: 80000040 : 80000040: 00200e13 li t3,2 80000044: 00000097 auipc ra,0x0 -80000048: 2900a083 lw ra,656(ra) # 800002d4 +80000048: 2e80a083 lw ra,744(ra) # 8000032c 8000004c: 00107153 fadd.s ft2,ft0,ft1 80000050: 00000013 nop 80000054: 00000013 nop @@ -67,7 +67,7 @@ Disassembly of section .crt_section: 800000cc: 00000013 nop 800000d0: 00000013 nop 800000d4: 00000097 auipc ra,0x0 -800000d8: 20008093 addi ra,ra,512 # 800002d4 +800000d8: 25808093 addi ra,ra,600 # 8000032c 800000dc: 0000a107 flw ft2,0(ra) 800000e0: 00000013 nop 800000e4: 00000013 nop @@ -85,9 +85,9 @@ Disassembly of section .crt_section: 8000010c: 00000013 nop 80000110: 00000013 nop 80000114: 00000097 auipc ra,0x0 -80000118: 1c008093 addi ra,ra,448 # 800002d4 +80000118: 21808093 addi ra,ra,536 # 8000032c 8000011c: 00000117 auipc sp,0x0 -80000120: 1bc10113 addi sp,sp,444 # 800002d8 +80000120: 21410113 addi sp,sp,532 # 80000330 80000124: 0000a087 flw ft1,0(ra) 80000128: 00012107 flw ft2,0(sp) 8000012c: 0020f1d3 fadd.s ft3,ft1,ft2 @@ -119,7 +119,7 @@ Disassembly of section .crt_section: 8000018c: 00000013 nop 80000190: 00000013 nop 80000194: 00000097 auipc ra,0x0 -80000198: 14808093 addi ra,ra,328 # 800002dc +80000198: 1a008093 addi ra,ra,416 # 80000334 8000019c: 0030a027 fsw ft3,0(ra) 800001a0: 00000013 nop 800001a4: 00000013 nop @@ -133,13 +133,13 @@ Disassembly of section .crt_section: 800001c0 : 800001c0: 00700e13 li t3,7 800001c4: 00000097 auipc ra,0x0 -800001c8: 11808093 addi ra,ra,280 # 800002dc +800001c8: 17008093 addi ra,ra,368 # 80000334 800001cc: 00000117 auipc sp,0x0 -800001d0: 11410113 addi sp,sp,276 # 800002e0 +800001d0: 16c10113 addi sp,sp,364 # 80000338 800001d4: 00000197 auipc gp,0x0 -800001d8: 11018193 addi gp,gp,272 # 800002e4 +800001d8: 16818193 addi gp,gp,360 # 8000033c 800001dc: 00000217 auipc tp,0x0 -800001e0: 10c20213 addi tp,tp,268 # 800002e8 +800001e0: 16420213 addi tp,tp,356 # 80000340 800001e4: 0000a207 flw ft4,0(ra) 800001e8: 00427253 fadd.s ft4,ft4,ft4 800001ec: 0040f2d3 fadd.s ft5,ft1,ft4 @@ -191,74 +191,86 @@ Disassembly of section .crt_section: 80000294: 00000013 nop 80000298: 00000013 nop 8000029c: 00000013 nop -800002a0: 0100006f j 800002b0 - -800002a4 : -800002a4: f0100137 lui sp,0xf0100 -800002a8: f2410113 addi sp,sp,-220 # f00fff24 -800002ac: 01c12023 sw t3,0(sp) - -800002b0 : -800002b0: f0100137 lui sp,0xf0100 -800002b4: f2010113 addi sp,sp,-224 # f00fff20 -800002b8: 00012023 sw zero,0(sp) +800002a0: 0200006f j 800002c0 +800002a4: 00000013 nop +800002a8: 00000013 nop +800002ac: 00000013 nop +800002b0: 00000013 nop +800002b4: 00000013 nop +800002b8: 00000013 nop 800002bc: 00000013 nop -800002c0: 00000013 nop -800002c4: 00000013 nop -800002c8: 00000013 nop -800002cc: 00000013 nop -800002d0: 00000013 nop -800002d4 : -800002d4: 0000 unimp -800002d6: 3fc0 fld fs0,184(a5) +800002c0 : +800002c0: 00a00e13 li t3,10 +800002c4: 01000093 li ra,16 +800002c8: d010f0d3 fcvt.s.wu ft1,ra +800002cc: 01200113 li sp,18 +800002d0: 20000193 li gp,512 +800002d4: d0117153 fcvt.s.wu ft2,sp +800002d8: d011f1d3 fcvt.s.wu ft3,gp +800002dc: 00000217 auipc tp,0x0 +800002e0: 0a422203 lw tp,164(tp) # 80000380 +800002e4: d01272d3 fcvt.s.wu ft5,tp +800002e8: 00000013 nop +800002ec: 00000013 nop +800002f0: 00000013 nop +800002f4: 00000013 nop +800002f8: 0100006f j 80000308 -800002d8 : -800002d8: 0000 unimp -800002da: 40a0 lw s0,64(s1) +800002fc : +800002fc: f0100137 lui sp,0xf0100 +80000300: f2410113 addi sp,sp,-220 # f00fff24 +80000304: 01c12023 sw t3,0(sp) -800002dc : -800002dc: 0049 c.nop 18 +80000308 : +80000308: f0100137 lui sp,0xf0100 +8000030c: f2010113 addi sp,sp,-224 # f00fff20 +80000310: 00012023 sw zero,0(sp) +80000314: 00000013 nop +80000318: 00000013 nop +8000031c: 00000013 nop +80000320: 00000013 nop +80000324: 00000013 nop +80000328: 00000013 nop + +8000032c : +8000032c: 0000 unimp +8000032e: 3fc0 fld fs0,184(a5) + +80000330 : +80000330: 0000 unimp +80000332: 40a0 lw s0,64(s1) + +80000334 : +80000334: 0049 c.nop 18 ... -800002e0 : -800002e0: 003a c.slli zero,0xe +80000338 : +80000338: 003a c.slli zero,0xe ... -800002e4 : -800002e4: 0038 addi a4,sp,8 +8000033c : +8000033c: 0038 addi a4,sp,8 ... -800002e8 : -800002e8: 0000004b fnmsub.s ft0,ft0,ft0,ft0,rne +80000340 : +80000340: 0000004b fnmsub.s ft0,ft0,ft0,ft0,rne +80000344: 00000013 nop +80000348: 00000013 nop +8000034c: 00000013 nop +80000350: 00000013 nop +80000354: 00000013 nop +80000358: 00000013 nop +8000035c: 00000013 nop +80000360: 00000013 nop +80000364: 00000013 nop +80000368: 00000013 nop +8000036c: 00000013 nop +80000370: 00000013 nop +80000374: 00000013 nop +80000378: 00000013 nop +8000037c: 00000013 nop -800002ec : -800002ec: 0038 addi a4,sp,8 - ... - -800002f0 : -800002f0: 00000053 fadd.s ft0,ft0,ft0,rne - -800002f4 : -800002f4: 0021 c.nop 8 - ... - -800002f8 : -800002f8: ffffffbf 0xffffffbf - -800002fc : -800002fc: ffa9 bnez a5,80000256 -800002fe: ffff 0xffff - -80000300 : -80000300: ffc9 bnez a5,8000029a -80000302: ffff 0xffff - -80000304 : -80000304: 0004 0x4 -80000306: ffff 0xffff - -80000308 : -80000308: 0005 c.nop 1 -8000030a: ffff 0xffff +80000380 : +80000380: 01d4 addi a3,sp,196 ... diff --git a/src/test/cpp/raw/fpu/build/fpu.hex b/src/test/cpp/raw/fpu/build/fpu.hex index a182b0e..4b83443 100644 --- a/src/test/cpp/raw/fpu/build/fpu.hex +++ b/src/test/cpp/raw/fpu/build/fpu.hex @@ -3,7 +3,7 @@ :1000100013000000537110001300000013000000D3 :1000200013000000130000006F00800113000000A7 :100030001300000013000000130000001300000074 -:10004000130E20009700000083A0002953711000B8 +:10004000130E20009700000083A0802E5371100033 :100050001300000013000000130000001300000054 :100060006F000002130000001300000013000000E6 :100070001300000013000000130000001300000034 @@ -12,24 +12,24 @@ :1000A0001300000013000000130000001300000004 :1000B00013000000130000001300000013000000F4 :1000C000130E400013000000130000001300000096 -:1000D00013000000970000009380002007A100009B +:1000D00013000000970000009380802507A1000016 :1000E00013000000130000001300000013000000C4 :1000F0006F00000113000000130000001300000057 :10010000130E500013000000130000001300000045 -:1001100013000000970000009380001C17010000EE -:100120001301C11B87A0000007210100D3F12000AB +:100110001300000097000000938080211701000069 +:100120001301412187A0000007210100D3F1200025 :100130001300000013000000130000001300000073 :100140006F00000413000000130000001300000003 :100150001300000013000000130000001300000053 :100160001300000013000000130000001300000043 :100170001300000013000000130000001300000033 :10018000130E6000130000001300000013000000B5 -:1001900013000000970000009380801427A0300017 +:1001900013000000970000009380001A27A0300091 :1001A0001300000013000000130000001300000003 :1001B0006F00000113000000130000001300000096 -:1001C000130E70009700000093808011170100004B -:1001D00013014111970100009381011117020000E2 -:1001E0001302C21007A2000053724200D3F2400073 +:1001C000130E7000970000009380001717010000C5 +:1001D0001301C116970100009381811617020000D8 +:1001E0001302421607A2000053724200D3F24000ED :1001F0002720410027A051002720120013000000F3 :100200001300000013000000130000006F00400303 :100210001300000013000000130000001300000092 @@ -41,12 +41,22 @@ :100270001300000013000000130000001300000032 :10028000130E9000D30031A0538121A0D38131A05F :100290001300000013000000130000001300000012 -:1002A0006F000001370110F0130141F22320C1015A -:1002B000370110F0130101F22320010013000000A8 -:1002C00013000000130000001300000013000000E2 -:1002D000130000000000C03F0000A04049000000E3 -:1002E0003A000000380000004B0000003800000019 -:1002F0005300000021000000BFFFFFFFA9FFFFFF28 -:10030000C9FFFFFF0400FFFF0500FFFF0000000022 -:080310000000000000000000E5 +:1002A0006F000002130000001300000013000000A4 +:1002B00013000000130000001300000013000000F2 +:1002C000130EA00093000001D3F010D01301200101 +:1002D00093010020537111D0D3F111D01702000007 +:1002E0000322420AD37212D0130000001300000050 +:1002F00013000000130000006F000001370110F030 +:10030000130141F22320C101370110F0130101F262 +:100310002320010013000000130000001300000060 +:100320001300000013000000130000000000C03F95 +:100330000000A040490000003A0000003800000022 +:100340004B00000013000000130000001300000029 +:100350001300000013000000130000001300000051 +:100360001300000013000000130000001300000041 +:100370001300000013000000130000001300000031 +:10038000D401000000000000000000000000000098 +:10039000000000000000000000000000000000005D +:1003A000000000000000000000000000000000004D +:0803B000000000000000000045 :00000001FF diff --git a/src/test/cpp/raw/fpu/src/crt.S b/src/test/cpp/raw/fpu/src/crt.S index 0a3183f..302b98c 100644 --- a/src/test/cpp/raw/fpu/src/crt.S +++ b/src/test/cpp/raw/fpu/src/crt.S @@ -136,6 +136,23 @@ test9: nop nop nop + j test10 + +.align 6 +test10: + li TEST_ID, 10 + li x1, 16 + fcvt.s.wu f1, x1 + li x2, 18 + li x3, 512 + fcvt.s.wu f2, x2 + fcvt.s.wu f3, x3 + lw x4, test10_data + fcvt.s.wu f5, x4 + nop + nop + nop + nop /* la x1, test1_data li x2, 45 @@ -172,11 +189,5 @@ test3_data: .word 73 test4_data: .word 58 test5_data: .word 56 test6_data: .word 75 -test7_data: .word 56 -test8_data: .word 83 -test9_data: .word 33 -test10_data: .word -65 -test11_data: .word -87 -test12_data: .word -55 -test13_data: .word 0xFFFF0004 -test14_data: .word 0xFFFF0005 \ No newline at end of file +.align 6 +test10_data: .word 468 diff --git a/src/test/scala/vexriscv/ip/fpu/FpuTest.scala b/src/test/scala/vexriscv/ip/fpu/FpuTest.scala index 7b3dafb..b1645a4 100644 --- a/src/test/scala/vexriscv/ip/fpu/FpuTest.scala +++ b/src/test/scala/vexriscv/ip/fpu/FpuTest.scala @@ -192,6 +192,21 @@ class FpuTest extends FunSuite{ } rspQueue += body } + + def i2f(rd : Int, value : Int): Unit ={ + cmdQueue += {cmd => + cmd.opcode #= cmd.opcode.spinalEnum.I2F + cmd.value #= value + cmd.rs1.randomize() + cmd.rs2.randomize() + cmd.rs3.randomize() + cmd.rd #= rd + } + commitQueue += {cmd => + cmd.write #= true + cmd.load #= false + } + } } @@ -318,6 +333,17 @@ class FpuTest extends FunSuite{ } } + def testI2f(a : Int): Unit ={ + val rs = new RegAllocator() + val rd = Random.nextInt(32) + i2f(rd, a) + storeFloat(rd){v => + val ref = a.toInt + println(f"i2f($a) = $v, $ref") + assert(v === ref) + } + } + def testCmp(a : Float, b : Float): Unit ={ val rs = new RegAllocator() val rs1, rs2, rs3 = rs.allocate() @@ -334,6 +360,15 @@ class FpuTest extends FunSuite{ val b2f = lang.Float.intBitsToFloat(_) + + //TODO Test corner cases + testI2f(17) + testI2f(12) + testI2f(512) + testI2f(1) +// dut.clockDomain.waitSampling(1000) +// simFailure() + //TODO Test corner cases testCmp(1.0f, 2.0f) testCmp(1.5f, 2.0f) @@ -349,8 +384,7 @@ class FpuTest extends FunSuite{ testF2i(18.0f) testF2i(1200.0f) testF2i(1.0f) -// dut.clockDomain.waitSampling(1000) -// simFailure() + testAdd(0.1f, 1.6f)