From 8c4fae8bf2eeea749c3c0b8fe5522a6d1f9b37cc Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Tue, 19 Jan 2021 13:27:42 +0100 Subject: [PATCH] fpu add min/sgnj/fmv --- src/main/scala/vexriscv/ip/fpu/FpuCore.scala | 123 +++++++++------ .../scala/vexriscv/ip/fpu/Interface.scala | 2 +- src/test/scala/vexriscv/ip/fpu/FpuTest.scala | 145 ++++++++++++++++++ 3 files changed, 220 insertions(+), 50 deletions(-) diff --git a/src/main/scala/vexriscv/ip/fpu/FpuCore.scala b/src/main/scala/vexriscv/ip/fpu/FpuCore.scala index 245209e..4ef198e 100644 --- a/src/main/scala/vexriscv/ip/fpu/FpuCore.scala +++ b/src/main/scala/vexriscv/ip/fpu/FpuCore.scala @@ -56,6 +56,9 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{ val source = Source() val opcode = p.Opcode() val rs1, rs2 = p.internalFloating() + val lockId = lockIdType() + val rd = p.rfAddress() + val value = Bits(32 bits) } case class MulInput() extends Bundle{ @@ -78,13 +81,6 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{ val div = Bool() } - case class I2fInput() extends Bundle{ - val source = Source() - val rd = p.rfAddress() - val lockId = lockIdType() - val value = Bits(32 bits) - } - case class AddInput() extends Bundle{ val source = Source() @@ -203,10 +199,26 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{ is(p.Opcode.F2I){ useRs1 := True } + is(p.Opcode.MIN_MAX){ + useRd := True + useRs1 := True + useRs2 := True + } is(p.Opcode.CMP){ useRs1 := True useRs2 := True } + is(p.Opcode.SGNJ){ + useRd := True + useRs1 := True + useRs2 := True + } + is(p.Opcode.FMV_X_W){ + useRs1 := True + } + is(p.Opcode.FMV_W_X){ + useRd := True + } } val hits = List((useRs1, s0.rs1), (useRs2, s0.rs2), (useRs3, s0.rs3), (useRd, s0.rd)).map{case (use, reg) => use && rf.lock.map(l => l.valid && l.source === s0.source && l.address === reg).orR} @@ -254,7 +266,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{ load.rs1 := read.output.rs1 load.lockId := read.output.lockId - val coreRspHit = List(FpuOpcode.STORE, FpuOpcode.F2I, FpuOpcode.CMP).map(input.opcode === _).orR + val coreRspHit = List(FpuOpcode.STORE, FpuOpcode.F2I, FpuOpcode.CMP, FpuOpcode.I2F, FpuOpcode.MIN_MAX, FpuOpcode.SGNJ, FpuOpcode.FMV_X_W, FpuOpcode.FMV_W_X).map(input.opcode === _).orR val coreRsp = Stream(StoreInput()) input.ready setWhen(coreRspHit && coreRsp.ready) coreRsp.valid := input.valid && coreRspHit @@ -262,17 +274,9 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{ coreRsp.opcode := read.output.opcode coreRsp.rs1 := read.output.rs1 coreRsp.rs2 := read.output.rs2 - - - val i2fHit = input.opcode === p.Opcode.I2F - val i2f = Stream(I2fInput()) - i2f.valid := input.valid && i2fHit - input.ready setWhen(i2fHit && i2f.ready) - i2f.source := read.output.source - i2f.rd := read.output.rd - i2f.value := read.output.value - i2f.lockId := read.output.lockId - + coreRsp.lockId := read.output.lockId + coreRsp.rd := read.output.rd + coreRsp.value := read.output.value val divSqrtHit = input.opcode === p.Opcode.DIV || input.opcode === p.Opcode.SQRT val divSqrt = Stream(DivSqrtInput()) @@ -319,21 +323,6 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{ } } - val i2f = new Area{ - val input = decode.i2f.stage() - val output = input.swapPayload(WriteInput()) - - val iLog2 = OHToUInt(OHMasking.last(input.value)) - val shifted = (input.value << p.internalMantissaSize) >> iLog2 - - output.source := input.source - output.lockId := input.lockId - output.rd := input.rd - output.value.sign := False - output.value.exponent := iLog2 +^ exponentOne - output.value.mantissa := U(shifted).resized - } - val load = new Area{ @@ -351,9 +340,13 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{ } - val coreRsp = new Area{ + + + val shortPip = new Area{ val input = decode.coreRsp.stage() + val rfOutput = Stream(WriteInput()) + val result = p.storeLoadType().assignDontCare() val storeResult = input.rs2.asBits @@ -361,6 +354,9 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{ val f2iShifted = (U"1" @@ input.rs1.mantissa) << (f2iShift.resize(5 bits)) val f2iResult = f2iShifted.asBits >> p.internalMantissaSize + val i2fLog2 = OHToUInt(OHMasking.last(input.value)) + val i2fShifted = (input.value << p.internalMantissaSize) >> i2fLog2 + val rs1Equal = input.rs1 === input.rs2 val rs1AbsSmaller = (input.rs1.exponent @@ input.rs1.mantissa) < (input.rs2.exponent @@ input.rs2.mantissa) val rs1Smaller = (input.rs1.sign ## input.rs2.sign).mux( @@ -369,18 +365,47 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{ 2 -> True, 3 -> (!rs1AbsSmaller && !rs1Equal) ) + + val minMaxResult = rs1Smaller ? input.rs1 | input.rs2 val cmpResult = B(rs1Smaller) switch(input.opcode){ - is(FpuOpcode.STORE){ result := storeResult } - is(FpuOpcode.F2I) { result := f2iResult } - is(FpuOpcode.CMP) { result := cmpResult.resized } + is(FpuOpcode.STORE) { result := storeResult } + is(FpuOpcode.F2I) { result := f2iResult } + is(FpuOpcode.CMP) { result := cmpResult.resized } //TODO + is(FpuOpcode.FMV_X_W) { result := input.rs1.asBits } //TODO } - input.ready := io.port.map(_.rsp.ready).read(input.source) + val toFpuRf = List(FpuOpcode.MIN_MAX, FpuOpcode.I2F, FpuOpcode.SGNJ, FpuOpcode.FMV_W_X).map(input.opcode === _).orR + + rfOutput.valid := input.valid && toFpuRf + rfOutput.source := input.source + rfOutput.lockId := input.lockId + rfOutput.rd := input.rd + rfOutput.value.assignDontCare() + switch(input.opcode){ + is(FpuOpcode.I2F){ + rfOutput.value.sign := False + rfOutput.value.exponent := i2fLog2 +^ exponentOne + rfOutput.value.mantissa := U(i2fShifted).resized + } + is(FpuOpcode.MIN_MAX){ + rfOutput.value := minMaxResult + } + is(FpuOpcode.SGNJ){ + rfOutput.value.sign := input.rs2.sign + rfOutput.value.exponent := input.rs1.exponent + rfOutput.value.mantissa := input.rs1.mantissa + } + is(FpuOpcode.FMV_W_X){ + rfOutput.value.assignFromBits(input.value) //TODO + } + } + + input.ready := (toFpuRf ? rfOutput.ready | io.port.map(_.rsp.ready).read(input.source)) for(i <- 0 until portCount){ def rsp = io.port(i).rsp - rsp.valid := input.valid && input.source === i + rsp.valid := input.valid && input.source === i && !toFpuRf rsp.value := result } } @@ -650,7 +675,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{ val write = new Area{ - val arbitrated = StreamArbiterFactory.lowerFirst.noLock.on(List(load.output, add.output, mul.output, i2f.output)) + val arbitrated = StreamArbiterFactory.lowerFirst.noLock.on(List(load.output, add.output, mul.output, shortPip.rfOutput)) val isCommited = rf.lock.map(_.commited).read(arbitrated.lockId) val commited = arbitrated.haltWhen(!isCommited).toFlow @@ -692,14 +717,14 @@ object FpuSynthesisBench extends App{ withDouble = false ) ) - rtls += new Fpu( - "64", - portCount = 1, - FpuParameter( - internalMantissaSize = 52, - withDouble = true - ) - ) +// rtls += new Fpu( +// "64", +// portCount = 1, +// FpuParameter( +// internalMantissaSize = 52, +// withDouble = true +// ) +// ) val targets = XilinxStdTargets()// ++ AlteraStdTargets() diff --git a/src/main/scala/vexriscv/ip/fpu/Interface.scala b/src/main/scala/vexriscv/ip/fpu/Interface.scala index 0719bff..1003fdb 100644 --- a/src/main/scala/vexriscv/ip/fpu/Interface.scala +++ b/src/main/scala/vexriscv/ip/fpu/Interface.scala @@ -31,7 +31,7 @@ case class FpuFloat(exponentSize: Int, } object FpuOpcode extends SpinalEnum{ - val LOAD, STORE, MUL, ADD, FMA, I2F, F2I, CMP, DIV, SQRT = newElement() + val LOAD, STORE, MUL, ADD, FMA, I2F, F2I, CMP, DIV, SQRT, MIN_MAX, SGNJ, FMV_X_W, FMV_W_X = newElement() } object FpuFormat extends SpinalEnum{ diff --git a/src/test/scala/vexriscv/ip/fpu/FpuTest.scala b/src/test/scala/vexriscv/ip/fpu/FpuTest.scala index b1645a4..f2904f7 100644 --- a/src/test/scala/vexriscv/ip/fpu/FpuTest.scala +++ b/src/test/scala/vexriscv/ip/fpu/FpuTest.scala @@ -207,6 +207,64 @@ class FpuTest extends FunSuite{ cmd.load #= false } } + + def fmv_x_w(rs1 : Int)(body : FpuRsp => Unit): Unit ={ + cmdQueue += {cmd => + cmd.opcode #= cmd.opcode.spinalEnum.FMV_X_W + cmd.value.randomize() + cmd.rs1 #= rs1 + cmd.rs2.randomize() + cmd.rs3.randomize() + cmd.rd.randomize() + } + rspQueue += body + } + + def fmv_w_x(rd : Int, value : Int): Unit ={ + cmdQueue += {cmd => + cmd.opcode #= cmd.opcode.spinalEnum.FMV_W_X + cmd.value #= value.toLong & 0xFFFFFFFFl + cmd.rs1.randomize() + cmd.rs2.randomize() + cmd.rs3.randomize() + cmd.rd #= rd + } + commitQueue += {cmd => + cmd.write #= true + cmd.load #= false + } + } + + def min(rd : Int, rs1 : Int, rs2 : Int): Unit ={ + cmdQueue += {cmd => + cmd.opcode #= cmd.opcode.spinalEnum.MIN_MAX + cmd.value.randomize() + cmd.rs1 #= rs1 + cmd.rs2 #= rs2 + cmd.rs3.randomize() + cmd.rd #= rd + } + commitQueue += {cmd => + cmd.write #= true + cmd.load #= false + } + } + + + def sgnj(rd : Int, rs1 : Int, rs2 : Int): Unit ={ + cmdQueue += {cmd => + cmd.opcode #= cmd.opcode.spinalEnum.SGNJ + cmd.value.randomize() + cmd.rs1 #= rs1 + cmd.rs2 #= rs2 + cmd.rs3.randomize() + cmd.rd #= rd + } + commitQueue += {cmd => + cmd.write #= true + cmd.load #= false + } + } } @@ -358,9 +416,90 @@ class FpuTest extends FunSuite{ } } + def testFmv_x_w(a : Float): Unit ={ + val rs = new RegAllocator() + val rs1, rs2, rs3 = rs.allocate() + val rd = Random.nextInt(32) + load(rs1, a) + fmv_x_w(rs1){rsp => + val ref = lang.Float.floatToIntBits(a).toLong & 0xFFFFFFFFl + val v = rsp.value.toBigInt + println(f"fmv_x_w $a = $v, $ref") + assert(v === ref) + } + } + + def testFmv_w_x(a : Int): Unit ={ + val rs = new RegAllocator() + val rs1, rs2, rs3 = rs.allocate() + val rd = Random.nextInt(32) + fmv_w_x(rd, a) + storeFloat(rd){v => + val ref = lang.Float.intBitsToFloat(a) + println(f"fmv_w_x $a = $v, $ref") + assert(v === ref) + } + } + + + + def testMin(a : Float, b : Float): Unit ={ + val rs = new RegAllocator() + val rs1, rs2, rs3 = rs.allocate() + val rd = Random.nextInt(32) + load(rs1, a) + load(rs2, b) + + min(rd,rs1,rs2) + storeFloat(rd){v => + val ref = a min b + println(f"min $a $b = $v, $ref") + assert(ref == v) + } + } + + def testSgnj(a : Float, b : Float): Unit ={ + val rs = new RegAllocator() + val rs1, rs2, rs3 = rs.allocate() + val rd = Random.nextInt(32) + load(rs1, a) + load(rs2, b) + + sgnj(rd,rs1,rs2) + storeFloat(rd){v => + val ref = a * a.signum * b.signum + println(f"sgnf $a $b = $v, $ref") + assert(ref == v) + } + } + + val b2f = lang.Float.intBitsToFloat(_) + testFmv_x_w(1.246f) + testFmv_w_x(lang.Float.floatToIntBits(7.234f)) + + testMin(1.0f, 2.0f) + testMin(1.5f, 2.0f) + testMin(1.5f, 3.5f) + testMin(1.5f, 1.5f) + testMin(1.5f, -1.5f) + testMin(-1.5f, 1.5f) + testMin(-1.5f, -1.5f) + testMin(1.5f, -3.5f) + + testSgnj(1.0f, 2.0f) + testSgnj(1.5f, 2.0f) + testSgnj(1.5f, 3.5f) + testSgnj(1.5f, 1.5f) + testSgnj(1.5f, -1.5f) + testSgnj(-1.5f, 1.5f) + testSgnj(-1.5f, -1.5f) + testSgnj(1.5f, -3.5f) + + + //TODO Test corner cases testI2f(17) testI2f(12) @@ -442,6 +581,12 @@ class FpuTest extends FunSuite{ tests += (() =>{testDiv(randomFloat(), randomFloat())}) tests += (() =>{testSqrt(randomFloat().abs)}) tests += (() =>{testCmp(randomFloat(), randomFloat())}) + tests += (() =>{testFmv_x_w(randomFloat())}) + tests += (() =>{testFmv_w_x(lang.Float.floatToIntBits(randomFloat()))}) + tests += (() =>{testMin(randomFloat(), randomFloat())}) + tests += (() =>{testSgnj(randomFloat(), randomFloat())}) + + tests.randomPick().apply() } waitUntil(cpu.rspQueue.isEmpty)