diff --git a/src/main/scala/vexriscv/ip/fpu/FpuCore.scala b/src/main/scala/vexriscv/ip/fpu/FpuCore.scala index 8f7657a..8209c11 100644 --- a/src/main/scala/vexriscv/ip/fpu/FpuCore.scala +++ b/src/main/scala/vexriscv/ip/fpu/FpuCore.scala @@ -7,7 +7,7 @@ import spinal.lib.eda.bench.{Bench, Rtl, XilinxStdTargets} import scala.collection.mutable.ArrayBuffer object FpuDivSqrtIterationState extends SpinalEnum{ - val IDLE, YY, XYY, Y2_XYY, DIV, Y_15_XYY2, Y_15_XYY2_RESULT, SQRT = newElement() + val IDLE, YY, XYY, Y2_XYY, DIV, _15_XYY2, Y_15_XYY2, Y_15_XYY2_RESULT, SQRT = newElement() } case class FpuCore(p : FpuParameter) extends Component{ @@ -117,9 +117,13 @@ case class FpuCore(p : FpuParameter) extends Component{ useRs2 := True useRd := True } - is(p.Opcode.DIV_SQRT){ + is(p.Opcode.DIV){ + useRs1 := True + useRs2 := True + useRd := True + } + is(p.Opcode.SQRT){ useRs1 := True - useRs2 := True //TODO useRd := True } is(p.Opcode.FMA){ @@ -174,7 +178,7 @@ case class FpuCore(p : FpuParameter) extends Component{ store.source := read.output.source store.rs2 := read.output.rs2 - val divSqrtHit = input.opcode === p.Opcode.DIV_SQRT + val divSqrtHit = input.opcode === p.Opcode.DIV || input.opcode === p.Opcode.SQRT val divSqrt = Stream(DivSqrtInput()) input.ready setWhen(divSqrtHit && divSqrt.ready) divSqrt.valid := input.valid && divSqrtHit @@ -183,7 +187,7 @@ case class FpuCore(p : FpuParameter) extends Component{ divSqrt.rs2 := read.output.rs2 divSqrt.rd := read.output.rd divSqrt.lockId := read.output.lockId - divSqrt.div := True //TODO + divSqrt.div := input.opcode === p.Opcode.DIV val fmaHit = input.opcode === p.Opcode.FMA val mulHit = input.opcode === p.Opcode.MUL || fmaHit @@ -315,14 +319,21 @@ case class FpuCore(p : FpuParameter) extends Component{ val rom = Mem(UInt(aproxWidth bits), aproxDepth * 2) val divTable, sqrtTable = ArrayBuffer[Double]() for(i <- 0 until aproxDepth){ - val mantissa = 1+(i+0.5)/aproxDepth - divTable += 1/mantissa - sqrtTable += 1/Math.sqrt(mantissa) + val value = 1+(i+0.5)/aproxDepth + divTable += 1/value + } + for(i <- 0 until aproxDepth){ + val scale = if(i < aproxDepth/2) 2 else 1 + val value = scale+(scale*(i%(aproxDepth/2)+0.5)/aproxDepth*2) +// println(s"$i => $value" ) + sqrtTable += 1/Math.sqrt(value) } val romElaboration = (sqrtTable ++ divTable).map(v => BigInt(((v-0.5)*2*(1 << aproxWidth)).round)) rom.initBigInt(romElaboration) - val address = U(input.div ## (input.div ? input.rs2.mantissa | input.rs1.mantissa).takeHigh(log2Up(aproxDepth))) + val div = input.rs2.mantissa.takeHigh(log2Up(aproxDepth)) + val sqrt = U(input.rs1.exponent.lsb ## input.rs1.mantissa).takeHigh(log2Up(aproxDepth)) + val address = U(input.div ## (input.div ? div | sqrt)) val raw = rom.readAsync(address) val result = U"01" @@ (raw << (mulWidth-aproxWidth-2)) } @@ -331,7 +342,7 @@ case class FpuCore(p : FpuParameter) extends Component{ val value = (1 << p.internalExponentSize) - 3 - input.rs2.exponent } val sqrtExp = new Area{ - val value = ((1 << p.internalExponentSize-1) + (1 << p.internalExponentSize-2) - 2) - (input.rs2.exponent >> 1) + input.rs2.exponent.lsb.asUInt + val value = ((1 << p.internalExponentSize-1) + (1 << p.internalExponentSize-2) - 2 -1) - (input.rs1.exponent >> 1) + U(!input.rs1.exponent.lsb) } def mulArg(rs1 : UInt, rs2 : UInt): Unit ={ @@ -345,7 +356,6 @@ case class FpuCore(p : FpuParameter) extends Component{ mulBuffer.ready := False val iterationValue = Reg(UInt(mulWidth bits)) - //val squareInput = (iteration === 0) ? aprox.result | iterationValue input.ready := False switch(state){ @@ -365,13 +375,12 @@ case class FpuCore(p : FpuParameter) extends Component{ } is(XYY){ decode.divSqrtToMul.valid := mulBuffer.valid - mulArg(U"1" @@ (input.div ? input.rs2.mantissa | input.rs1.mantissa), mulBuffer.payload) + val sqrtIn = !input.rs1.exponent.lsb ? (U"1" @@ input.rs1.mantissa) | ((U"1" @@ input.rs1.mantissa) |>> 1) + val divIn = U"1" @@ input.rs2.mantissa + mulArg(input.div ? divIn| sqrtIn, mulBuffer.payload) when(mulBuffer.valid && decode.divSqrtToMul.ready) { - state := (input.div ? Y2_XYY | Y_15_XYY2) - mulBuffer.ready := input.div - when(!input.div){ - mulBuffer.payload.getDrivingReg := (U"11" << mulWidth-2) - (mulBuffer.payload >> 1) - } + state := (input.div ? Y2_XYY | _15_XYY2) + mulBuffer.ready := True } } is(Y2_XYY){ @@ -399,25 +408,25 @@ case class FpuCore(p : FpuParameter) extends Component{ input.ready := True } } + is(_15_XYY2){ + when(mulBuffer.valid) { + state := Y_15_XYY2 + mulBuffer.payload.getDrivingReg := (U"11" << mulWidth-2) - (mulBuffer.payload) + } + } is(Y_15_XYY2){ decode.divSqrtToMul.valid := True - mulArg(U"1" @@ input.rs1.mantissa, mulBuffer.payload) + mulArg(iterationValue, mulBuffer.payload) when(decode.divSqrtToMul.ready) { mulBuffer.ready := True - state := SQRT + state := Y_15_XYY2_RESULT } } is(Y_15_XYY2_RESULT){ - when(iteration =/= sqrtIterationCount-1 && !input.rs1.exponent.lsb) { - iterationValue := mulBuffer.payload - } otherwise { - val v = 1.0/Math.sqrt(2.0) - val scaled = v* (BigInt(1) << mulWidth-1).toDouble - val bigInt = BigDecimal(scaled).toBigInt() - iterationValue := mulBuffer.payload + U(bigInt) - } + iterationValue := mulBuffer.payload mulBuffer.ready := True when(mulBuffer.valid) { + iteration := iteration + 1 when(iteration =/= sqrtIterationCount-1){ state := YY } otherwise { diff --git a/src/main/scala/vexriscv/ip/fpu/Interface.scala b/src/main/scala/vexriscv/ip/fpu/Interface.scala index b07c18c..2c5b3f1 100644 --- a/src/main/scala/vexriscv/ip/fpu/Interface.scala +++ b/src/main/scala/vexriscv/ip/fpu/Interface.scala @@ -23,7 +23,7 @@ case class FpuFloat(exponentSize: Int, } case class FpuOpcode(p : FpuParameter) extends SpinalEnum{ - val LOAD, STORE, MUL, ADD, FMA, I2F, F2I, CMP, DIV_SQRT = newElement() + val LOAD, STORE, MUL, ADD, FMA, I2F, F2I, CMP, DIV, SQRT = newElement() } case class FpuParameter( internalMantissaSize : Int, diff --git a/src/test/scala/vexriscv/ip/fpu/FpuTest.scala b/src/test/scala/vexriscv/ip/fpu/FpuTest.scala index 68c3e46..0ca5ca7 100644 --- a/src/test/scala/vexriscv/ip/fpu/FpuTest.scala +++ b/src/test/scala/vexriscv/ip/fpu/FpuTest.scala @@ -99,7 +99,7 @@ class FpuTest extends FunSuite{ def div(rd : Int, rs1 : Int, rs2 : Int): Unit ={ cmdQueue += {cmd => cmd.source #= id - cmd.opcode #= cmd.opcode.spinalEnum.DIV_SQRT + cmd.opcode #= cmd.opcode.spinalEnum.DIV cmd.value.randomize() cmd.rs1 #= rs1 cmd.rs2 #= rs2 @@ -108,6 +108,18 @@ class FpuTest extends FunSuite{ } } + def sqrt(rd : Int, rs1 : Int): Unit ={ + cmdQueue += {cmd => + cmd.source #= id + cmd.opcode #= cmd.opcode.spinalEnum.SQRT + cmd.value.randomize() + cmd.rs1 #= rs1 + cmd.rs2.randomize() + cmd.rs3.randomize() + cmd.rd #= rd + } + } + def fma(rd : Int, rs1 : Int, rs2 : Int, rs3 : Int): Unit ={ cmdQueue += {cmd => cmd.source #= id @@ -175,7 +187,8 @@ class FpuTest extends FunSuite{ } def randomFloat(): Float ={ - Random.nextFloat() * 1e2f * (if(Random.nextBoolean()) -1f else 1f) + val exp = Random.nextInt(10)-5 + (Random.nextDouble() * (Math.pow(2.0, exp)) * (if(Random.nextBoolean()) -1.0 else 1.0)).toFloat } def testAdd(a : Float, b : Float): Unit ={ @@ -219,9 +232,9 @@ class FpuTest extends FunSuite{ fma(rd,rs1,rs2,rs3) storeFloat(rd){v => - val ref = a * b + c - println(f"$a * $b + $c = $v, $ref") - assert(checkFloat(ref, v)) + val ref = a.toDouble * b.toDouble + c.toDouble + println(f"$a%.20f * $b%.20f + $c%.20f = $v%.20f, $ref%.20f") + assert(checkFloat(ref.toFloat, v)) } } @@ -248,7 +261,7 @@ class FpuTest extends FunSuite{ val rd = Random.nextInt(32) load(rs1, a) - div(rd,rs1,rs2) + sqrt(rd,rs1) storeFloat(rd){v => val ref = Math.sqrt(a).toFloat val error = Math.abs(ref-v)/ref @@ -260,8 +273,20 @@ class FpuTest extends FunSuite{ val b2f = lang.Float.intBitsToFloat(_) -// testSqrt(2.25f) -// dut.clockDomain.waitSampling(100) + testSqrt(1.5625f) + testSqrt(1.5625f*2) + testSqrt(1.8f) + testSqrt(4.4f) + testSqrt(0.3f) + testSqrt(1.5625f*2) + testSqrt(b2f(0x3f7ffffe)) + testSqrt(b2f(0x3f7fffff)) + testSqrt(b2f(0x3f800000)) + testSqrt(b2f(0x3f800001)) + testSqrt(b2f(0x3f800002)) + testSqrt(b2f(0x3f800003)) + + // dut.clockDomain.waitSampling(1000) // simFailure() testAdd(0.1f, 1.6f) @@ -286,16 +311,19 @@ class FpuTest extends FunSuite{ for(i <- 0 until 1000){ testFma(randomFloat(), randomFloat(), randomFloat()) } - for(i <- 0 until 1000){ testDiv(randomFloat(), randomFloat()) } + for(i <- 0 until 1000){ + testSqrt(Math.abs(randomFloat())) //TODO + } for(i <- 0 until 1000){ val tests = ArrayBuffer[() => Unit]() tests += (() =>{testAdd(randomFloat(), randomFloat())}) tests += (() =>{testMul(randomFloat(), randomFloat())}) tests += (() =>{testFma(randomFloat(), randomFloat(), randomFloat())}) tests += (() =>{testDiv(randomFloat(), randomFloat())}) + tests += (() =>{testSqrt(randomFloat().abs)}) tests.randomPick().apply() } waitUntil(cpu.rspQueue.isEmpty)