fpu vex i2f works

This commit is contained in:
Dolu1990 2021-01-18 17:18:01 +01:00
parent d4b877d415
commit d7220031d4
6 changed files with 210 additions and 97 deletions

View file

@ -32,6 +32,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
val opcode = p.Opcode()
val rs1, rs2, rs3 = p.rfAddress()
val rd = p.rfAddress()
val value = Bits(32 bits)
}
case class RfReadOutput() extends Bundle{
@ -40,6 +41,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
val lockId = lockIdType()
val rs1, rs2, rs3 = p.internalFloating()
val rd = p.rfAddress()
val value = Bits(32 bits)
}
@ -67,6 +69,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
val minus = Bool()
}
case class DivSqrtInput() extends Bundle{
val source = Source()
val rs1, rs2 = p.internalFloating()
@ -75,6 +78,14 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
val div = Bool()
}
case class I2fInput() extends Bundle{
val source = Source()
val rd = p.rfAddress()
val lockId = lockIdType()
val value = Bits(32 bits)
}
case class AddInput() extends Bundle{
val source = Source()
val rs1, rs2 = p.internalFloating()
@ -223,6 +234,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
output.source := s1.source
output.opcode := s1.opcode
output.lockId := s1LockId
output.value := s1.value
output.rd := s1.rd
output.rs1 := rf.ram.readSync(s0.source @@ s0.rs1,enable = !output.isStall)
output.rs2 := rf.ram.readSync(s0.source @@ s0.rs2,enable = !output.isStall)
@ -251,6 +263,17 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
coreRsp.rs1 := read.output.rs1
coreRsp.rs2 := read.output.rs2
val i2fHit = input.opcode === p.Opcode.I2F
val i2f = Stream(I2fInput())
i2f.valid := input.valid && i2fHit
input.ready setWhen(i2fHit && i2f.ready)
i2f.source := read.output.source
i2f.rd := read.output.rd
i2f.value := read.output.value
i2f.lockId := read.output.lockId
val divSqrtHit = input.opcode === p.Opcode.DIV || input.opcode === p.Opcode.SQRT
val divSqrt = Stream(DivSqrtInput())
input.ready setWhen(divSqrtHit && divSqrt.ready)
@ -296,6 +319,23 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
}
}
val i2f = new Area{
val input = decode.i2f.stage()
val output = input.swapPayload(WriteInput())
val iLog2 = OHToUInt(OHMasking.last(input.value))
val shifted = (input.value << p.internalMantissaSize) >> iLog2
output.source := input.source
output.lockId := input.lockId
output.rd := input.rd
output.value.sign := False
output.value.exponent := iLog2 +^ exponentOne
output.value.mantissa := U(shifted).resized
}
val load = new Area{
val input = decode.load.stage()
val filtred = commitFork.load.map(port => port.takeWhen(port.load))
@ -610,7 +650,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
val write = new Area{
val arbitrated = StreamArbiterFactory.lowerFirst.noLock.on(List(load.output, add.output, mul.output))
val arbitrated = StreamArbiterFactory.lowerFirst.noLock.on(List(load.output, add.output, mul.output, i2f.output))
val isCommited = rf.lock.map(_.commited).read(arbitrated.lockId)
val commited = arbitrated.haltWhen(!isCommited).toFlow

View file

@ -27,6 +27,7 @@ class FpuPlugin(externalFpu : Boolean = false,
decoderService.add(List(
FADD_S -> List(FPU_ENABLE -> True, FPU_OPCODE -> FpuOpcode.ADD, FPU_COMMIT -> True, FPU_ALU -> True , FPU_LOAD -> False, FPU_RSP -> False),
FLW -> List(FPU_ENABLE -> True, FPU_OPCODE -> FpuOpcode.LOAD, FPU_COMMIT -> True, FPU_ALU -> False, FPU_LOAD -> True , FPU_RSP -> False),
FCVT_S_WU -> List(FPU_ENABLE -> True, FPU_OPCODE -> FpuOpcode.I2F , FPU_COMMIT -> True , FPU_ALU -> True, FPU_LOAD -> False, FPU_RSP -> False, RS1_USE -> True),
FSW -> List(FPU_ENABLE -> True, FPU_OPCODE -> FpuOpcode.STORE, FPU_COMMIT -> False, FPU_ALU -> False, FPU_LOAD -> False, FPU_RSP -> True),
FCVT_WU_S -> List(FPU_ENABLE -> True, FPU_OPCODE -> FpuOpcode.F2I , FPU_COMMIT -> False, FPU_ALU -> False, FPU_LOAD -> False, FPU_RSP -> True, REGFILE_WRITE_VALID -> True, BYPASSABLE_EXECUTE_STAGE -> False, BYPASSABLE_MEMORY_STAGE -> False),
FLE_S -> List(FPU_ENABLE -> True, FPU_OPCODE -> FpuOpcode.CMP , FPU_COMMIT -> False, FPU_ALU -> False, FPU_LOAD -> False, FPU_RSP -> True, REGFILE_WRITE_VALID -> True, BYPASSABLE_EXECUTE_STAGE -> False, BYPASSABLE_MEMORY_STAGE -> False)
@ -58,10 +59,15 @@ class FpuPlugin(externalFpu : Boolean = false,
//Maybe it might be better to not fork before fire to avoid RF stall on commits
val forked = Reg(Bool) setWhen(port.cmd.fire) clearWhen(!arbitration.isStuck) init(False)
val i2fReady = Reg(Bool()) setWhen(!arbitration.isStuckByOthers) clearWhen(!arbitration.isStuck)
val i2fHazard = input(FPU_OPCODE) === FpuOpcode.I2F && !i2fReady
arbitration.haltItself setWhen(arbitration.isValid && i2fHazard)
arbitration.haltItself setWhen(port.cmd.isStall)
port.cmd.valid := arbitration.isValid && input(FPU_ENABLE) && !forked
port.cmd.valid := arbitration.isValid && input(FPU_ENABLE) && !forked && !i2fHazard
port.cmd.opcode := input(FPU_OPCODE)
port.cmd.value := output(RS1)
port.cmd.value := RegNext(output(RS1))
port.cmd.function := 0
port.cmd.rs1 := input(INSTRUCTION)(rs1Range).asUInt
port.cmd.rs2 := input(INSTRUCTION)(rs2Range).asUInt

View file

@ -25,7 +25,7 @@ Disassembly of section .crt_section:
80000040 <test2>:
80000040: 00200e13 li t3,2
80000044: 00000097 auipc ra,0x0
80000048: 2900a083 lw ra,656(ra) # 800002d4 <test1_data>
80000048: 2e80a083 lw ra,744(ra) # 8000032c <test1_data>
8000004c: 00107153 fadd.s ft2,ft0,ft1
80000050: 00000013 nop
80000054: 00000013 nop
@ -67,7 +67,7 @@ Disassembly of section .crt_section:
800000cc: 00000013 nop
800000d0: 00000013 nop
800000d4: 00000097 auipc ra,0x0
800000d8: 20008093 addi ra,ra,512 # 800002d4 <test1_data>
800000d8: 25808093 addi ra,ra,600 # 8000032c <test1_data>
800000dc: 0000a107 flw ft2,0(ra)
800000e0: 00000013 nop
800000e4: 00000013 nop
@ -85,9 +85,9 @@ Disassembly of section .crt_section:
8000010c: 00000013 nop
80000110: 00000013 nop
80000114: 00000097 auipc ra,0x0
80000118: 1c008093 addi ra,ra,448 # 800002d4 <test1_data>
80000118: 21808093 addi ra,ra,536 # 8000032c <test1_data>
8000011c: 00000117 auipc sp,0x0
80000120: 1bc10113 addi sp,sp,444 # 800002d8 <test2_data>
80000120: 21410113 addi sp,sp,532 # 80000330 <test2_data>
80000124: 0000a087 flw ft1,0(ra)
80000128: 00012107 flw ft2,0(sp)
8000012c: 0020f1d3 fadd.s ft3,ft1,ft2
@ -119,7 +119,7 @@ Disassembly of section .crt_section:
8000018c: 00000013 nop
80000190: 00000013 nop
80000194: 00000097 auipc ra,0x0
80000198: 14808093 addi ra,ra,328 # 800002dc <test3_data>
80000198: 1a008093 addi ra,ra,416 # 80000334 <test3_data>
8000019c: 0030a027 fsw ft3,0(ra)
800001a0: 00000013 nop
800001a4: 00000013 nop
@ -133,13 +133,13 @@ Disassembly of section .crt_section:
800001c0 <test7>:
800001c0: 00700e13 li t3,7
800001c4: 00000097 auipc ra,0x0
800001c8: 11808093 addi ra,ra,280 # 800002dc <test3_data>
800001c8: 17008093 addi ra,ra,368 # 80000334 <test3_data>
800001cc: 00000117 auipc sp,0x0
800001d0: 11410113 addi sp,sp,276 # 800002e0 <test4_data>
800001d0: 16c10113 addi sp,sp,364 # 80000338 <test4_data>
800001d4: 00000197 auipc gp,0x0
800001d8: 11018193 addi gp,gp,272 # 800002e4 <test5_data>
800001d8: 16818193 addi gp,gp,360 # 8000033c <test5_data>
800001dc: 00000217 auipc tp,0x0
800001e0: 10c20213 addi tp,tp,268 # 800002e8 <test6_data>
800001e0: 16420213 addi tp,tp,356 # 80000340 <test6_data>
800001e4: 0000a207 flw ft4,0(ra)
800001e8: 00427253 fadd.s ft4,ft4,ft4
800001ec: 0040f2d3 fadd.s ft5,ft1,ft4
@ -191,74 +191,86 @@ Disassembly of section .crt_section:
80000294: 00000013 nop
80000298: 00000013 nop
8000029c: 00000013 nop
800002a0: 0100006f j 800002b0 <pass>
800002a4 <fail>:
800002a4: f0100137 lui sp,0xf0100
800002a8: f2410113 addi sp,sp,-220 # f00fff24 <test14_data+0x700ffc1c>
800002ac: 01c12023 sw t3,0(sp)
800002b0 <pass>:
800002b0: f0100137 lui sp,0xf0100
800002b4: f2010113 addi sp,sp,-224 # f00fff20 <test14_data+0x700ffc18>
800002b8: 00012023 sw zero,0(sp)
800002a0: 0200006f j 800002c0 <test10>
800002a4: 00000013 nop
800002a8: 00000013 nop
800002ac: 00000013 nop
800002b0: 00000013 nop
800002b4: 00000013 nop
800002b8: 00000013 nop
800002bc: 00000013 nop
800002c0: 00000013 nop
800002c4: 00000013 nop
800002c8: 00000013 nop
800002cc: 00000013 nop
800002d0: 00000013 nop
800002d4 <test1_data>:
800002d4: 0000 unimp
800002d6: 3fc0 fld fs0,184(a5)
800002c0 <test10>:
800002c0: 00a00e13 li t3,10
800002c4: 01000093 li ra,16
800002c8: d010f0d3 fcvt.s.wu ft1,ra
800002cc: 01200113 li sp,18
800002d0: 20000193 li gp,512
800002d4: d0117153 fcvt.s.wu ft2,sp
800002d8: d011f1d3 fcvt.s.wu ft3,gp
800002dc: 00000217 auipc tp,0x0
800002e0: 0a422203 lw tp,164(tp) # 80000380 <test10_data>
800002e4: d01272d3 fcvt.s.wu ft5,tp
800002e8: 00000013 nop
800002ec: 00000013 nop
800002f0: 00000013 nop
800002f4: 00000013 nop
800002f8: 0100006f j 80000308 <pass>
800002d8 <test2_data>:
800002d8: 0000 unimp
800002da: 40a0 lw s0,64(s1)
800002fc <fail>:
800002fc: f0100137 lui sp,0xf0100
80000300: f2410113 addi sp,sp,-220 # f00fff24 <test10_data+0x700ffba4>
80000304: 01c12023 sw t3,0(sp)
800002dc <test3_data>:
800002dc: 0049 c.nop 18
80000308 <pass>:
80000308: f0100137 lui sp,0xf0100
8000030c: f2010113 addi sp,sp,-224 # f00fff20 <test10_data+0x700ffba0>
80000310: 00012023 sw zero,0(sp)
80000314: 00000013 nop
80000318: 00000013 nop
8000031c: 00000013 nop
80000320: 00000013 nop
80000324: 00000013 nop
80000328: 00000013 nop
8000032c <test1_data>:
8000032c: 0000 unimp
8000032e: 3fc0 fld fs0,184(a5)
80000330 <test2_data>:
80000330: 0000 unimp
80000332: 40a0 lw s0,64(s1)
80000334 <test3_data>:
80000334: 0049 c.nop 18
...
800002e0 <test4_data>:
800002e0: 003a c.slli zero,0xe
80000338 <test4_data>:
80000338: 003a c.slli zero,0xe
...
800002e4 <test5_data>:
800002e4: 0038 addi a4,sp,8
8000033c <test5_data>:
8000033c: 0038 addi a4,sp,8
...
800002e8 <test6_data>:
800002e8: 0000004b fnmsub.s ft0,ft0,ft0,ft0,rne
80000340 <test6_data>:
80000340: 0000004b fnmsub.s ft0,ft0,ft0,ft0,rne
80000344: 00000013 nop
80000348: 00000013 nop
8000034c: 00000013 nop
80000350: 00000013 nop
80000354: 00000013 nop
80000358: 00000013 nop
8000035c: 00000013 nop
80000360: 00000013 nop
80000364: 00000013 nop
80000368: 00000013 nop
8000036c: 00000013 nop
80000370: 00000013 nop
80000374: 00000013 nop
80000378: 00000013 nop
8000037c: 00000013 nop
800002ec <test7_data>:
800002ec: 0038 addi a4,sp,8
...
800002f0 <test8_data>:
800002f0: 00000053 fadd.s ft0,ft0,ft0,rne
800002f4 <test9_data>:
800002f4: 0021 c.nop 8
...
800002f8 <test10_data>:
800002f8: ffffffbf 0xffffffbf
800002fc <test11_data>:
800002fc: ffa9 bnez a5,80000256 <test8+0x16>
800002fe: ffff 0xffff
80000300 <test12_data>:
80000300: ffc9 bnez a5,8000029a <test9+0x1a>
80000302: ffff 0xffff
80000304 <test13_data>:
80000304: 0004 0x4
80000306: ffff 0xffff
80000308 <test14_data>:
80000308: 0005 c.nop 1
8000030a: ffff 0xffff
80000380 <test10_data>:
80000380: 01d4 addi a3,sp,196
...

View file

@ -3,7 +3,7 @@
:1000100013000000537110001300000013000000D3
:1000200013000000130000006F00800113000000A7
:100030001300000013000000130000001300000074
:10004000130E20009700000083A0002953711000B8
:10004000130E20009700000083A0802E5371100033
:100050001300000013000000130000001300000054
:100060006F000002130000001300000013000000E6
:100070001300000013000000130000001300000034
@ -12,24 +12,24 @@
:1000A0001300000013000000130000001300000004
:1000B00013000000130000001300000013000000F4
:1000C000130E400013000000130000001300000096
:1000D00013000000970000009380002007A100009B
:1000D00013000000970000009380802507A1000016
:1000E00013000000130000001300000013000000C4
:1000F0006F00000113000000130000001300000057
:10010000130E500013000000130000001300000045
:1001100013000000970000009380001C17010000EE
:100120001301C11B87A0000007210100D3F12000AB
:100110001300000097000000938080211701000069
:100120001301412187A0000007210100D3F1200025
:100130001300000013000000130000001300000073
:100140006F00000413000000130000001300000003
:100150001300000013000000130000001300000053
:100160001300000013000000130000001300000043
:100170001300000013000000130000001300000033
:10018000130E6000130000001300000013000000B5
:1001900013000000970000009380801427A0300017
:1001900013000000970000009380001A27A0300091
:1001A0001300000013000000130000001300000003
:1001B0006F00000113000000130000001300000096
:1001C000130E70009700000093808011170100004B
:1001D00013014111970100009381011117020000E2
:1001E0001302C21007A2000053724200D3F2400073
:1001C000130E7000970000009380001717010000C5
:1001D0001301C116970100009381811617020000D8
:1001E0001302421607A2000053724200D3F24000ED
:1001F0002720410027A051002720120013000000F3
:100200001300000013000000130000006F00400303
:100210001300000013000000130000001300000092
@ -41,12 +41,22 @@
:100270001300000013000000130000001300000032
:10028000130E9000D30031A0538121A0D38131A05F
:100290001300000013000000130000001300000012
:1002A0006F000001370110F0130141F22320C1015A
:1002B000370110F0130101F22320010013000000A8
:1002C00013000000130000001300000013000000E2
:1002D000130000000000C03F0000A04049000000E3
:1002E0003A000000380000004B0000003800000019
:1002F0005300000021000000BFFFFFFFA9FFFFFF28
:10030000C9FFFFFF0400FFFF0500FFFF0000000022
:080310000000000000000000E5
:1002A0006F000002130000001300000013000000A4
:1002B00013000000130000001300000013000000F2
:1002C000130EA00093000001D3F010D01301200101
:1002D00093010020537111D0D3F111D01702000007
:1002E0000322420AD37212D0130000001300000050
:1002F00013000000130000006F000001370110F030
:10030000130141F22320C101370110F0130101F262
:100310002320010013000000130000001300000060
:100320001300000013000000130000000000C03F95
:100330000000A040490000003A0000003800000022
:100340004B00000013000000130000001300000029
:100350001300000013000000130000001300000051
:100360001300000013000000130000001300000041
:100370001300000013000000130000001300000031
:10038000D401000000000000000000000000000098
:10039000000000000000000000000000000000005D
:1003A000000000000000000000000000000000004D
:0803B000000000000000000045
:00000001FF

View file

@ -136,6 +136,23 @@ test9:
nop
nop
nop
j test10
.align 6
test10:
li TEST_ID, 10
li x1, 16
fcvt.s.wu f1, x1
li x2, 18
li x3, 512
fcvt.s.wu f2, x2
fcvt.s.wu f3, x3
lw x4, test10_data
fcvt.s.wu f5, x4
nop
nop
nop
nop
/* la x1, test1_data
li x2, 45
@ -172,11 +189,5 @@ test3_data: .word 73
test4_data: .word 58
test5_data: .word 56
test6_data: .word 75
test7_data: .word 56
test8_data: .word 83
test9_data: .word 33
test10_data: .word -65
test11_data: .word -87
test12_data: .word -55
test13_data: .word 0xFFFF0004
test14_data: .word 0xFFFF0005
.align 6
test10_data: .word 468

View file

@ -192,6 +192,21 @@ class FpuTest extends FunSuite{
}
rspQueue += body
}
def i2f(rd : Int, value : Int): Unit ={
cmdQueue += {cmd =>
cmd.opcode #= cmd.opcode.spinalEnum.I2F
cmd.value #= value
cmd.rs1.randomize()
cmd.rs2.randomize()
cmd.rs3.randomize()
cmd.rd #= rd
}
commitQueue += {cmd =>
cmd.write #= true
cmd.load #= false
}
}
}
@ -318,6 +333,17 @@ class FpuTest extends FunSuite{
}
}
def testI2f(a : Int): Unit ={
val rs = new RegAllocator()
val rd = Random.nextInt(32)
i2f(rd, a)
storeFloat(rd){v =>
val ref = a.toInt
println(f"i2f($a) = $v, $ref")
assert(v === ref)
}
}
def testCmp(a : Float, b : Float): Unit ={
val rs = new RegAllocator()
val rs1, rs2, rs3 = rs.allocate()
@ -334,6 +360,15 @@ class FpuTest extends FunSuite{
val b2f = lang.Float.intBitsToFloat(_)
//TODO Test corner cases
testI2f(17)
testI2f(12)
testI2f(512)
testI2f(1)
// dut.clockDomain.waitSampling(1000)
// simFailure()
//TODO Test corner cases
testCmp(1.0f, 2.0f)
testCmp(1.5f, 2.0f)
@ -349,8 +384,7 @@ class FpuTest extends FunSuite{
testF2i(18.0f)
testF2i(1200.0f)
testF2i(1.0f)
// dut.clockDomain.waitSampling(1000)
// simFailure()
testAdd(0.1f, 1.6f)