fpu add rounding is ok excepted infinity result
This commit is contained in:
parent
1ae84ea83b
commit
fc3e6a6d0a
|
@ -873,23 +873,24 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
val rs1MantissaBigger = input.rs1.mantissa > input.rs2.mantissa
|
val rs1MantissaBigger = input.rs1.mantissa > input.rs2.mantissa
|
||||||
val absRs1Bigger = ((rs1ExponentBigger || rs1ExponentEqual && rs1MantissaBigger) && !input.rs1.isZero || input.rs1.isInfinity) && !input.rs2.isInfinity
|
val absRs1Bigger = ((rs1ExponentBigger || rs1ExponentEqual && rs1MantissaBigger) && !input.rs1.isZero || input.rs1.isInfinity) && !input.rs2.isInfinity
|
||||||
val shiftBy = rs1ExponentBigger ? (0-exp21) | exp21
|
val shiftBy = rs1ExponentBigger ? (0-exp21) | exp21
|
||||||
val shiftOverflow = shiftBy >= p.internalMantissaSize
|
val shiftOverflow = (shiftBy >= p.internalMantissaSize+3)
|
||||||
val passThrough = shiftOverflow || (input.rs1.isZero) || (input.rs2.isZero)
|
val passThrough = shiftOverflow || (input.rs1.isZero) || (input.rs2.isZero)
|
||||||
|
|
||||||
//Note that rs1ExponentBigger can be replaced by absRs1Bigger bellow to avoid xsigned two complement in math block at expense of combinatorial path
|
//Note that rs1ExponentBigger can be replaced by absRs1Bigger bellow to avoid xsigned two complement in math block at expense of combinatorial path
|
||||||
val xySign = absRs1Bigger ? input.rs1.sign | input.rs2.sign
|
val xySign = absRs1Bigger ? input.rs1.sign | input.rs2.sign
|
||||||
val xSign = xySign ^ (rs1ExponentBigger ? input.rs1.sign | input.rs2.sign)
|
val xSign = xySign ^ (rs1ExponentBigger ? input.rs1.sign | input.rs2.sign)
|
||||||
val ySign = xySign ^ (rs1ExponentBigger ? input.rs2.sign | input.rs1.sign)
|
val ySign = xySign ^ (rs1ExponentBigger ? input.rs2.sign | input.rs1.sign)
|
||||||
val xMantissa = U"1" @@ (rs1ExponentBigger ? input.rs1.mantissa | input.rs2.mantissa) @@ U"0"
|
val xMantissa = U"1" @@ (rs1ExponentBigger ? input.rs1.mantissa | input.rs2.mantissa) @@ U"00"
|
||||||
val yMantissaUnshifted = U"1" @@ (rs1ExponentBigger ? input.rs2.mantissa | input.rs1.mantissa) @@ U"0"
|
val yMantissaUnshifted = U"1" @@ (rs1ExponentBigger ? input.rs2.mantissa | input.rs1.mantissa) @@ U"00"
|
||||||
var yMantissa = yMantissaUnshifted
|
var yMantissa = CombInit(yMantissaUnshifted)
|
||||||
val roundingScrap = CombInit(shiftOverflow)
|
val roundingScrap = CombInit(shiftOverflow)
|
||||||
for(i <- 0 until log2Up(p.internalMantissaSize)){
|
for(i <- 0 until log2Up(p.internalMantissaSize)){
|
||||||
roundingScrap setWhen(shiftBy(i) && yMantissa(0, 1 << i bits) =/= 0)
|
roundingScrap setWhen(shiftBy(i) && yMantissa(0, 1 << i bits) =/= 0)
|
||||||
yMantissa \= shiftBy(i) ? (yMantissa |>> (BigInt(1) << i)) | yMantissa
|
yMantissa \= shiftBy(i) ? (yMantissa |>> (BigInt(1) << i)) | yMantissa
|
||||||
}
|
}
|
||||||
when(passThrough) { yMantissa := 0 }
|
when(passThrough) { yMantissa := 0 }
|
||||||
// val yMantissa = yMantissaUnshifted >> (passThrough.asUInt @@ shiftBy.resize(log2Up(p.internalMantissaSize))) //Maybe passThrough.asUInt @@ do not infer small logic
|
when(shiftOverflow) { roundingScrap := True }
|
||||||
|
when(input.rs1.special || input.rs2.special){ roundingScrap := False }
|
||||||
val xyExponent = rs1ExponentBigger ? input.rs1.exponent | input.rs2.exponent
|
val xyExponent = rs1ExponentBigger ? input.rs1.exponent | input.rs2.exponent
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -901,9 +902,9 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
def xyExponent = shifter.xyExponent
|
def xyExponent = shifter.xyExponent
|
||||||
def xySign = shifter.xySign
|
def xySign = shifter.xySign
|
||||||
|
|
||||||
val xSigned = xMantissa.twoComplement(xSign)
|
val xSigned = xMantissa.twoComplement(xSign) //TODO Is that necessary ?
|
||||||
val ySigned = yMantissa.twoComplement(ySign)
|
val overshot = (ySign && shifter.roundingScrap)
|
||||||
// val ySigned = ((ySign ## Mux(ySign, ~yMantissa, yMantissa)).asUInt +^ (ySign || yMantissa.lsb).asUInt).asSInt //rounding here
|
val ySigned = ((ySign ## Mux(ySign, ~yMantissa, yMantissa)).asUInt + (ySign && !shifter.roundingScrap).asUInt).asSInt //rounding here
|
||||||
val xyMantissa = U(xSigned +^ ySigned).trim(1 bits)
|
val xyMantissa = U(xSigned +^ ySigned).trim(1 bits)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -915,10 +916,12 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
val shiftOh = OHMasking.first(xyMantissa.asBools.reverse)
|
val shiftOh = OHMasking.first(xyMantissa.asBools.reverse)
|
||||||
val shift = OHToUInt(shiftOh)
|
val shift = OHToUInt(shiftOh)
|
||||||
val mantissa = (xyMantissa |<< shift)
|
val mantissa = (xyMantissa |<< shift)
|
||||||
|
// val mantissa = ((shifter.roundingScrap.asUInt @@ xyMantissa.reversed) |>> shift).reversed >> 1
|
||||||
val exponent = xyExponent -^ shift + 1
|
val exponent = xyExponent -^ shift + 1
|
||||||
xySign clearWhen(input.rs1.isZero && input.rs2.isZero)
|
xySign clearWhen(input.rs1.isZero && input.rs2.isZero)
|
||||||
val forceZero = xyMantissa === 0 || exponent.msb || (input.rs1.isZero && input.rs2.isZero)
|
val forceZero = xyMantissa === 0 || exponent.msb || (input.rs1.isZero && input.rs2.isZero)
|
||||||
val forceOverflow = exponent === exponentOne + 128 || (input.rs1.isInfinity || input.rs2.isInfinity)
|
val forceOverflow = exponent === exponentOne + 128
|
||||||
|
val forceInfinity = (input.rs1.isInfinity || input.rs2.isInfinity)
|
||||||
val forceNan = input.rs1.isNan || input.rs2.isNan || (input.rs1.isInfinity && input.rs2.isInfinity && (input.rs1.sign ^ input.rs2.sign))
|
val forceNan = input.rs1.isNan || input.rs2.isNan || (input.rs1.isInfinity && input.rs2.isInfinity && (input.rs1.sign ^ input.rs2.sign))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -928,11 +931,11 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
output.lockId := input.lockId
|
output.lockId := input.lockId
|
||||||
output.rd := input.rd
|
output.rd := input.rd
|
||||||
output.value.sign := norm.xySign
|
output.value.sign := norm.xySign
|
||||||
output.value.mantissa := (norm.mantissa >> 2).resized
|
output.value.mantissa := (norm.mantissa >> 3).resized
|
||||||
output.value.exponent := norm.exponent.resized
|
output.value.exponent := norm.exponent.resized
|
||||||
output.value.special := False
|
output.value.special := False
|
||||||
output.roundMode := input.roundMode
|
output.roundMode := input.roundMode
|
||||||
output.round := norm.mantissa(1 downto 0) | (U"0" @@ shifter.roundingScrap)
|
output.round := U(norm.mantissa(2)) @@ U(norm.mantissa(1) | norm.mantissa(0) | shifter.roundingScrap)
|
||||||
|
|
||||||
when(norm.forceNan) {
|
when(norm.forceNan) {
|
||||||
output.value.setNanQuiet
|
output.value.setNanQuiet
|
||||||
|
@ -941,8 +944,25 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
when(norm.xyMantissa === 0 || input.rs1.isZero && input.rs2.isZero){
|
when(norm.xyMantissa === 0 || input.rs1.isZero && input.rs2.isZero){
|
||||||
output.value.sign := input.rs1.sign && input.rs2.sign
|
output.value.sign := input.rs1.sign && input.rs2.sign
|
||||||
}
|
}
|
||||||
} elsewhen(norm.forceOverflow) {
|
when((input.rs1.sign || input.rs2.sign) && input.roundMode === FpuRoundMode.RDN){
|
||||||
|
output.value.sign := True
|
||||||
|
}
|
||||||
|
} elsewhen(norm.forceInfinity) {
|
||||||
output.value.setInfinity
|
output.value.setInfinity
|
||||||
|
} elsewhen(norm.forceOverflow) {
|
||||||
|
val doMax = input.roundMode.mux(
|
||||||
|
FpuRoundMode.RNE -> (True),
|
||||||
|
FpuRoundMode.RTZ -> (True),
|
||||||
|
FpuRoundMode.RDN -> (!output.value.sign),
|
||||||
|
FpuRoundMode.RUP -> (output.value.sign),
|
||||||
|
FpuRoundMode.RMM -> (True)
|
||||||
|
)
|
||||||
|
when(doMax){
|
||||||
|
output.value.exponent := exponentOne + 127
|
||||||
|
output.value.mantissa.setAll()
|
||||||
|
} otherwise {
|
||||||
|
output.value.setInfinity
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -2,6 +2,7 @@ package vexriscv.ip.fpu
|
||||||
|
|
||||||
import java.io.File
|
import java.io.File
|
||||||
import java.lang
|
import java.lang
|
||||||
|
import java.util.Scanner
|
||||||
|
|
||||||
import org.apache.commons.io.FileUtils
|
import org.apache.commons.io.FileUtils
|
||||||
import org.scalatest.FunSuite
|
import org.scalatest.FunSuite
|
||||||
|
@ -14,12 +15,15 @@ import spinal.sim.Backend.{isMac, isWindows}
|
||||||
|
|
||||||
import scala.collection.mutable
|
import scala.collection.mutable
|
||||||
import scala.collection.mutable.ArrayBuffer
|
import scala.collection.mutable.ArrayBuffer
|
||||||
|
import scala.sys.process.ProcessLogger
|
||||||
import scala.util.Random
|
import scala.util.Random
|
||||||
|
|
||||||
|
|
||||||
class FpuTest extends FunSuite{
|
class FpuTest extends FunSuite{
|
||||||
|
|
||||||
val b2f = lang.Float.intBitsToFloat(_)
|
val b2f = lang.Float.intBitsToFloat(_)
|
||||||
val f2b = lang.Float.floatToIntBits(_)
|
val f2b = lang.Float.floatToIntBits(_)
|
||||||
|
|
||||||
def clamp(f : Float) = {
|
def clamp(f : Float) = {
|
||||||
f // if(f.abs < b2f(0x00800000)) b2f(f2b(f) & 0x80000000) else f
|
f // if(f.abs < b2f(0x00800000)) b2f(f2b(f) & 0x80000000) else f
|
||||||
}
|
}
|
||||||
|
@ -31,11 +35,41 @@ class FpuTest extends FunSuite{
|
||||||
withDouble = false
|
withDouble = false
|
||||||
)
|
)
|
||||||
|
|
||||||
SimConfig.withFstWave.compile(new FpuCore(portCount, p)).doSim(seed = 42){ dut =>
|
val config = SimConfig
|
||||||
|
// config.withFstWave
|
||||||
|
config.compile(new FpuCore(portCount, p)).doSim(seed = 42){ dut =>
|
||||||
dut.clockDomain.forkStimulus(10)
|
dut.clockDomain.forkStimulus(10)
|
||||||
|
dut.clockDomain.forkSimSpeedPrinter()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class TestCase(t : String, op : String){
|
||||||
|
def build(arg : String) = new ProcessStream(s"testfloat_gen $arg -forever -${t}_$op"){
|
||||||
|
def f32_2 ={
|
||||||
|
val l = next
|
||||||
|
val s = new Scanner(l)
|
||||||
|
(b2f(s.nextLong(16).toInt), b2f(s.nextLong(16).toInt), b2f(s.nextLong(16).toInt), s.nextInt(16))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
val RNE = build("-rnear_even")
|
||||||
|
val RTZ = build("-rminMag")
|
||||||
|
val RDN = build("-rmin")
|
||||||
|
val RUP = build("-rmax")
|
||||||
|
val RMM = build("-rnear_maxMag")
|
||||||
|
val all = List(RNE, RTZ, RDN, RUP, RMM)
|
||||||
|
def kill = all.foreach(_.kill)
|
||||||
|
def apply(rounding : FpuRoundMode.E) = rounding match {
|
||||||
|
case FpuRoundMode.RNE => RNE
|
||||||
|
case FpuRoundMode.RTZ => RTZ
|
||||||
|
case FpuRoundMode.RDN => RDN
|
||||||
|
case FpuRoundMode.RUP => RUP
|
||||||
|
case FpuRoundMode.RMM => RMM
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
val f32 = new {
|
||||||
|
val add = new TestCase("f32", "add")
|
||||||
|
}
|
||||||
|
|
||||||
val cpus = for(id <- 0 until portCount) yield new {
|
val cpus = for(id <- 0 until portCount) yield new {
|
||||||
val cmdQueue = mutable.Queue[FpuCmd => Unit]()
|
val cmdQueue = mutable.Queue[FpuCmd => Unit]()
|
||||||
|
@ -96,6 +130,7 @@ class FpuTest extends FunSuite{
|
||||||
}
|
}
|
||||||
|
|
||||||
rspQueue += body
|
rspQueue += body
|
||||||
|
waitUntil(rspQueue.isEmpty)
|
||||||
}
|
}
|
||||||
|
|
||||||
def storeFloat(rs : Int)(body : Float => Unit): Unit ={
|
def storeFloat(rs : Int)(body : Float => Unit): Unit ={
|
||||||
|
@ -341,6 +376,18 @@ class FpuTest extends FunSuite{
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def testAddExact(a : Float, b : Float, ref : Float, flag : Int, rounding : FpuRoundMode.E): Unit ={
|
||||||
|
val rs = new RegAllocator()
|
||||||
|
val rs1, rs2, rs3 = rs.allocate()
|
||||||
|
val rd = Random.nextInt(32)
|
||||||
|
load(rs1, a)
|
||||||
|
load(rs2, b)
|
||||||
|
add(rd,rs1,rs2, rounding)
|
||||||
|
storeFloat(rd){v =>
|
||||||
|
assert(f2b(v) == f2b(ref), f"## ${a} + $b = $v, $ref $rounding")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
def testLoadStore(a : Float): Unit ={
|
def testLoadStore(a : Float): Unit ={
|
||||||
val rd = Random.nextInt(32)
|
val rd = Random.nextInt(32)
|
||||||
load(rd, a)
|
load(rd, a)
|
||||||
|
@ -560,16 +607,57 @@ class FpuTest extends FunSuite{
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
// roundingModes.foreach(rounding => println(Clib.math.addF32(0.0f, 0.0f, rounding.position)))
|
||||||
|
// roundingModes.foreach(rounding => println(Clib.math.addF32(1.0f,-1.0f, rounding.position)))
|
||||||
|
|
||||||
|
println()
|
||||||
|
println(Clib.math.addF32(8.0f, b2f(0xBf800000), 0))
|
||||||
|
println(Clib.math.addF32(8.0f, b2f(0xBf800001), 0))
|
||||||
|
println(Clib.math.addF32(8.0f, b2f(0xBf800002), 0))
|
||||||
|
println(Clib.math.addF32(8.0f, b2f(0xBf800003), 0))
|
||||||
|
println(Clib.math.addF32(8.0f, b2f(0xBf800004), 0))
|
||||||
|
println(Clib.math.addF32(8.0f, b2f(0xBf800005), 0))
|
||||||
|
println(Clib.math.addF32(8.0f, b2f(0xBf800006), 0))
|
||||||
|
println(Clib.math.addF32(8.0f, b2f(0xBf800007), 0))
|
||||||
|
println(Clib.math.addF32(8.0f, b2f(0xBf800008), 0))
|
||||||
|
|
||||||
|
testAdd(-5.3687091E8f, 16.249022f, FpuRoundMode.RNE)
|
||||||
|
testAdd(-5.3687091E8f, 16.0f, FpuRoundMode.RNE)
|
||||||
|
testAdd(-5.3687091E8f, 15.0f, FpuRoundMode.RNE)
|
||||||
|
for(i <- 0 until 20) testAdd(4.0f, b2f(0xBf800000 + i), FpuRoundMode.RNE)
|
||||||
|
for(i <- 0 until 64) testAdd(12.0f, b2f(0xBf801000 + i), FpuRoundMode.RNE)
|
||||||
|
for(i <- 0 until 64) testAdd(8.0f, b2f(0xBf801000 + i), FpuRoundMode.RNE)
|
||||||
|
for(i <- 0 until 64) testAdd(12.0f, b2f(0x3f801000 + i), FpuRoundMode.RNE)
|
||||||
|
for(i <- 0 until 64) testAdd(8.0f, b2f(0x3f801000 + i), FpuRoundMode.RNE)
|
||||||
|
for(i <- 0 until 20) testAdd(b2f(0x40800000+3), b2f(0xBf800000 + i+1), FpuRoundMode.RNE)
|
||||||
|
for(i <- 0 until 20) testAdd(8.0f, b2f(0xBf800000 + i), FpuRoundMode.RNE)
|
||||||
|
for(i <- 0 until 20) testAdd(16.0f, b2f(0xBf800000 + i), FpuRoundMode.RNE)
|
||||||
|
// testAdd(8.0f, b2f(0xBf800001), FpuRoundMode.RNE)
|
||||||
|
// testAdd(8.0f, b2f(0xBf800002), FpuRoundMode.RNE)
|
||||||
|
// testAdd(8.0f, b2f(0xBf800003), FpuRoundMode.RNE)
|
||||||
|
// testAdd(8.0f, b2f(0xBf800004), FpuRoundMode.RNE)
|
||||||
|
// testAddExact(-256.2578f,1.8905041f ,-254.36731f,0, FpuRoundMode.RNE)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
for(_ <- 0 until 1000000){
|
||||||
|
val rounding = FpuRoundMode.elements.randomPick()
|
||||||
|
val (a,b,c,f) = f32.add(rounding).f32_2
|
||||||
|
if(/*a > 0 && b < 0 && */!c.isInfinity) testAddExact(a,b,c,f, rounding)
|
||||||
|
}
|
||||||
|
|
||||||
|
waitUntil(cmdQueue.isEmpty)
|
||||||
|
dut.clockDomain.waitSampling(1000)
|
||||||
|
simSuccess()
|
||||||
|
|
||||||
//TODO test and fix a - b rounding
|
//TODO test and fix a - b rounding
|
||||||
foreachRounding(testAdd(1.0f, b2f(0x3f800001), _)) //1.00001
|
foreachRounding(testAdd(1.0f, b2f(0x3f800001), _)) //1.00001
|
||||||
foreachRounding(testAdd(4.0f, b2f(0x3f800001), _)) //1.00001
|
foreachRounding(testAdd(4.0f, b2f(0x3f800001), _)) //1.00001
|
||||||
for(_ <- 0 until 10000; a = randomFloat(); b = randomFloat()) foreachRounding(testAdd(a.abs, b.abs,_)) //TODO negative
|
for(_ <- 0 until 10000; a = randomFloat(); b = randomFloat()) foreachRounding(testAdd(a.abs, b.abs,_)) //TODO negative
|
||||||
|
|
||||||
|
|
||||||
waitUntil(cmdQueue.isEmpty)
|
|
||||||
dut.clockDomain.waitSampling(1000)
|
|
||||||
simSuccess()
|
|
||||||
|
|
||||||
testAdd(b2f(0x3f800000), b2f(0x3f800000-1))
|
testAdd(b2f(0x3f800000), b2f(0x3f800000-1))
|
||||||
testAdd(1.1f, 2.3f)
|
testAdd(1.1f, 2.3f)
|
||||||
testAdd(1.2f, -1.2f)
|
testAdd(1.2f, -1.2f)
|
||||||
|
@ -841,3 +929,39 @@ object FpuCompileSo extends App{
|
||||||
println(Clib.math.addF32(1.00000011921f, 4.0f, FpuRoundMode.RDN.position))
|
println(Clib.math.addF32(1.00000011921f, 4.0f, FpuRoundMode.RDN.position))
|
||||||
println(Clib.math.addF32(1.00000011921f, 4.0f, FpuRoundMode.RUP.position))
|
println(Clib.math.addF32(1.00000011921f, 4.0f, FpuRoundMode.RUP.position))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
class ProcessStream(cmd : String){
|
||||||
|
import sys.process._
|
||||||
|
|
||||||
|
val buf = mutable.Queue[() => String]()
|
||||||
|
val p = Process(cmd).run(new ProcessLogger {
|
||||||
|
override def out(s: => String): Unit = {
|
||||||
|
while(buf.size > 10000) Thread.sleep(10)
|
||||||
|
buf.enqueue(() => s)
|
||||||
|
}
|
||||||
|
override def err(s: => String): Unit = {}
|
||||||
|
override def buffer[T](f: => T): T = f
|
||||||
|
})
|
||||||
|
|
||||||
|
def kill = p.destroy()
|
||||||
|
def next = {
|
||||||
|
while(buf.isEmpty) { Thread.sleep(10) }
|
||||||
|
buf.dequeue()()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
object TestSoftFloat extends App{
|
||||||
|
val p = new ProcessStream("testfloat_gen -forever f32_add")
|
||||||
|
Thread.sleep(1000)
|
||||||
|
println(p.next)
|
||||||
|
println(p.next)
|
||||||
|
println(p.next)
|
||||||
|
println(p.next)
|
||||||
|
println(p.next)
|
||||||
|
Thread.sleep(1000)
|
||||||
|
println(p.next)
|
||||||
|
while(true) {
|
||||||
|
Thread.sleep(10)
|
||||||
|
println(p.next)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue