FPU sqrt functional

This commit is contained in:
Dolu1990 2021-01-14 18:33:24 +01:00
parent 85dd5dbf8e
commit 04499c0b76
3 changed files with 74 additions and 37 deletions

View File

@ -7,7 +7,7 @@ import spinal.lib.eda.bench.{Bench, Rtl, XilinxStdTargets}
import scala.collection.mutable.ArrayBuffer
object FpuDivSqrtIterationState extends SpinalEnum{
val IDLE, YY, XYY, Y2_XYY, DIV, Y_15_XYY2, Y_15_XYY2_RESULT, SQRT = newElement()
val IDLE, YY, XYY, Y2_XYY, DIV, _15_XYY2, Y_15_XYY2, Y_15_XYY2_RESULT, SQRT = newElement()
}
case class FpuCore(p : FpuParameter) extends Component{
@ -117,9 +117,13 @@ case class FpuCore(p : FpuParameter) extends Component{
useRs2 := True
useRd := True
}
is(p.Opcode.DIV_SQRT){
is(p.Opcode.DIV){
useRs1 := True
useRs2 := True
useRd := True
}
is(p.Opcode.SQRT){
useRs1 := True
useRs2 := True //TODO
useRd := True
}
is(p.Opcode.FMA){
@ -174,7 +178,7 @@ case class FpuCore(p : FpuParameter) extends Component{
store.source := read.output.source
store.rs2 := read.output.rs2
val divSqrtHit = input.opcode === p.Opcode.DIV_SQRT
val divSqrtHit = input.opcode === p.Opcode.DIV || input.opcode === p.Opcode.SQRT
val divSqrt = Stream(DivSqrtInput())
input.ready setWhen(divSqrtHit && divSqrt.ready)
divSqrt.valid := input.valid && divSqrtHit
@ -183,7 +187,7 @@ case class FpuCore(p : FpuParameter) extends Component{
divSqrt.rs2 := read.output.rs2
divSqrt.rd := read.output.rd
divSqrt.lockId := read.output.lockId
divSqrt.div := True //TODO
divSqrt.div := input.opcode === p.Opcode.DIV
val fmaHit = input.opcode === p.Opcode.FMA
val mulHit = input.opcode === p.Opcode.MUL || fmaHit
@ -315,14 +319,21 @@ case class FpuCore(p : FpuParameter) extends Component{
val rom = Mem(UInt(aproxWidth bits), aproxDepth * 2)
val divTable, sqrtTable = ArrayBuffer[Double]()
for(i <- 0 until aproxDepth){
val mantissa = 1+(i+0.5)/aproxDepth
divTable += 1/mantissa
sqrtTable += 1/Math.sqrt(mantissa)
val value = 1+(i+0.5)/aproxDepth
divTable += 1/value
}
for(i <- 0 until aproxDepth){
val scale = if(i < aproxDepth/2) 2 else 1
val value = scale+(scale*(i%(aproxDepth/2)+0.5)/aproxDepth*2)
// println(s"$i => $value" )
sqrtTable += 1/Math.sqrt(value)
}
val romElaboration = (sqrtTable ++ divTable).map(v => BigInt(((v-0.5)*2*(1 << aproxWidth)).round))
rom.initBigInt(romElaboration)
val address = U(input.div ## (input.div ? input.rs2.mantissa | input.rs1.mantissa).takeHigh(log2Up(aproxDepth)))
val div = input.rs2.mantissa.takeHigh(log2Up(aproxDepth))
val sqrt = U(input.rs1.exponent.lsb ## input.rs1.mantissa).takeHigh(log2Up(aproxDepth))
val address = U(input.div ## (input.div ? div | sqrt))
val raw = rom.readAsync(address)
val result = U"01" @@ (raw << (mulWidth-aproxWidth-2))
}
@ -331,7 +342,7 @@ case class FpuCore(p : FpuParameter) extends Component{
val value = (1 << p.internalExponentSize) - 3 - input.rs2.exponent
}
val sqrtExp = new Area{
val value = ((1 << p.internalExponentSize-1) + (1 << p.internalExponentSize-2) - 2) - (input.rs2.exponent >> 1) + input.rs2.exponent.lsb.asUInt
val value = ((1 << p.internalExponentSize-1) + (1 << p.internalExponentSize-2) - 2 -1) - (input.rs1.exponent >> 1) + U(!input.rs1.exponent.lsb)
}
def mulArg(rs1 : UInt, rs2 : UInt): Unit ={
@ -345,7 +356,6 @@ case class FpuCore(p : FpuParameter) extends Component{
mulBuffer.ready := False
val iterationValue = Reg(UInt(mulWidth bits))
//val squareInput = (iteration === 0) ? aprox.result | iterationValue
input.ready := False
switch(state){
@ -365,13 +375,12 @@ case class FpuCore(p : FpuParameter) extends Component{
}
is(XYY){
decode.divSqrtToMul.valid := mulBuffer.valid
mulArg(U"1" @@ (input.div ? input.rs2.mantissa | input.rs1.mantissa), mulBuffer.payload)
val sqrtIn = !input.rs1.exponent.lsb ? (U"1" @@ input.rs1.mantissa) | ((U"1" @@ input.rs1.mantissa) |>> 1)
val divIn = U"1" @@ input.rs2.mantissa
mulArg(input.div ? divIn| sqrtIn, mulBuffer.payload)
when(mulBuffer.valid && decode.divSqrtToMul.ready) {
state := (input.div ? Y2_XYY | Y_15_XYY2)
mulBuffer.ready := input.div
when(!input.div){
mulBuffer.payload.getDrivingReg := (U"11" << mulWidth-2) - (mulBuffer.payload >> 1)
}
state := (input.div ? Y2_XYY | _15_XYY2)
mulBuffer.ready := True
}
}
is(Y2_XYY){
@ -399,25 +408,25 @@ case class FpuCore(p : FpuParameter) extends Component{
input.ready := True
}
}
is(_15_XYY2){
when(mulBuffer.valid) {
state := Y_15_XYY2
mulBuffer.payload.getDrivingReg := (U"11" << mulWidth-2) - (mulBuffer.payload)
}
}
is(Y_15_XYY2){
decode.divSqrtToMul.valid := True
mulArg(U"1" @@ input.rs1.mantissa, mulBuffer.payload)
mulArg(iterationValue, mulBuffer.payload)
when(decode.divSqrtToMul.ready) {
mulBuffer.ready := True
state := SQRT
state := Y_15_XYY2_RESULT
}
}
is(Y_15_XYY2_RESULT){
when(iteration =/= sqrtIterationCount-1 && !input.rs1.exponent.lsb) {
iterationValue := mulBuffer.payload
} otherwise {
val v = 1.0/Math.sqrt(2.0)
val scaled = v* (BigInt(1) << mulWidth-1).toDouble
val bigInt = BigDecimal(scaled).toBigInt()
iterationValue := mulBuffer.payload + U(bigInt)
}
iterationValue := mulBuffer.payload
mulBuffer.ready := True
when(mulBuffer.valid) {
iteration := iteration + 1
when(iteration =/= sqrtIterationCount-1){
state := YY
} otherwise {

View File

@ -23,7 +23,7 @@ case class FpuFloat(exponentSize: Int,
}
case class FpuOpcode(p : FpuParameter) extends SpinalEnum{
val LOAD, STORE, MUL, ADD, FMA, I2F, F2I, CMP, DIV_SQRT = newElement()
val LOAD, STORE, MUL, ADD, FMA, I2F, F2I, CMP, DIV, SQRT = newElement()
}
case class FpuParameter( internalMantissaSize : Int,

View File

@ -99,7 +99,7 @@ class FpuTest extends FunSuite{
def div(rd : Int, rs1 : Int, rs2 : Int): Unit ={
cmdQueue += {cmd =>
cmd.source #= id
cmd.opcode #= cmd.opcode.spinalEnum.DIV_SQRT
cmd.opcode #= cmd.opcode.spinalEnum.DIV
cmd.value.randomize()
cmd.rs1 #= rs1
cmd.rs2 #= rs2
@ -108,6 +108,18 @@ class FpuTest extends FunSuite{
}
}
def sqrt(rd : Int, rs1 : Int): Unit ={
cmdQueue += {cmd =>
cmd.source #= id
cmd.opcode #= cmd.opcode.spinalEnum.SQRT
cmd.value.randomize()
cmd.rs1 #= rs1
cmd.rs2.randomize()
cmd.rs3.randomize()
cmd.rd #= rd
}
}
def fma(rd : Int, rs1 : Int, rs2 : Int, rs3 : Int): Unit ={
cmdQueue += {cmd =>
cmd.source #= id
@ -175,7 +187,8 @@ class FpuTest extends FunSuite{
}
def randomFloat(): Float ={
Random.nextFloat() * 1e2f * (if(Random.nextBoolean()) -1f else 1f)
val exp = Random.nextInt(10)-5
(Random.nextDouble() * (Math.pow(2.0, exp)) * (if(Random.nextBoolean()) -1.0 else 1.0)).toFloat
}
def testAdd(a : Float, b : Float): Unit ={
@ -219,9 +232,9 @@ class FpuTest extends FunSuite{
fma(rd,rs1,rs2,rs3)
storeFloat(rd){v =>
val ref = a * b + c
println(f"$a * $b + $c = $v, $ref")
assert(checkFloat(ref, v))
val ref = a.toDouble * b.toDouble + c.toDouble
println(f"$a%.20f * $b%.20f + $c%.20f = $v%.20f, $ref%.20f")
assert(checkFloat(ref.toFloat, v))
}
}
@ -248,7 +261,7 @@ class FpuTest extends FunSuite{
val rd = Random.nextInt(32)
load(rs1, a)
div(rd,rs1,rs2)
sqrt(rd,rs1)
storeFloat(rd){v =>
val ref = Math.sqrt(a).toFloat
val error = Math.abs(ref-v)/ref
@ -260,8 +273,20 @@ class FpuTest extends FunSuite{
val b2f = lang.Float.intBitsToFloat(_)
// testSqrt(2.25f)
// dut.clockDomain.waitSampling(100)
testSqrt(1.5625f)
testSqrt(1.5625f*2)
testSqrt(1.8f)
testSqrt(4.4f)
testSqrt(0.3f)
testSqrt(1.5625f*2)
testSqrt(b2f(0x3f7ffffe))
testSqrt(b2f(0x3f7fffff))
testSqrt(b2f(0x3f800000))
testSqrt(b2f(0x3f800001))
testSqrt(b2f(0x3f800002))
testSqrt(b2f(0x3f800003))
// dut.clockDomain.waitSampling(1000)
// simFailure()
testAdd(0.1f, 1.6f)
@ -286,16 +311,19 @@ class FpuTest extends FunSuite{
for(i <- 0 until 1000){
testFma(randomFloat(), randomFloat(), randomFloat())
}
for(i <- 0 until 1000){
testDiv(randomFloat(), randomFloat())
}
for(i <- 0 until 1000){
testSqrt(Math.abs(randomFloat())) //TODO
}
for(i <- 0 until 1000){
val tests = ArrayBuffer[() => Unit]()
tests += (() =>{testAdd(randomFloat(), randomFloat())})
tests += (() =>{testMul(randomFloat(), randomFloat())})
tests += (() =>{testFma(randomFloat(), randomFloat(), randomFloat())})
tests += (() =>{testDiv(randomFloat(), randomFloat())})
tests += (() =>{testSqrt(randomFloat().abs)})
tests.randomPick().apply()
}
waitUntil(cpu.rspQueue.isEmpty)