fpu moved overflow rounding to writeback
This commit is contained in:
parent
fc3e6a6d0a
commit
3c4df1e963
|
@ -624,7 +624,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
// val exp = math.exp + U(needShift)
|
// val exp = math.exp + U(needShift)
|
||||||
// val man = needShift ? math.mulC(p.internalMantissaSize + 1, p.internalMantissaSize bits) | math.mulC(p.internalMantissaSize, p.internalMantissaSize bits)
|
// val man = needShift ? math.mulC(p.internalMantissaSize + 1, p.internalMantissaSize bits) | math.mulC(p.internalMantissaSize, p.internalMantissaSize bits)
|
||||||
|
|
||||||
val mulRounded = (math.mulC >> p.internalMantissaSize) + math.mulC(p.internalMantissaSize-1).asUInt
|
val mulRounded = (math.mulC >> p.internalMantissaSize)
|
||||||
val needShift = mulRounded.msb
|
val needShift = mulRounded.msb
|
||||||
val exp = math.exp + U(needShift)
|
val exp = math.exp + U(needShift)
|
||||||
val man = needShift ? mulRounded(1, p.internalMantissaSize bits) | mulRounded(0, p.internalMantissaSize bits)
|
val man = needShift ? mulRounded(1, p.internalMantissaSize bits) | mulRounded(0, p.internalMantissaSize bits)
|
||||||
|
@ -903,7 +903,6 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
def xySign = shifter.xySign
|
def xySign = shifter.xySign
|
||||||
|
|
||||||
val xSigned = xMantissa.twoComplement(xSign) //TODO Is that necessary ?
|
val xSigned = xMantissa.twoComplement(xSign) //TODO Is that necessary ?
|
||||||
val overshot = (ySign && shifter.roundingScrap)
|
|
||||||
val ySigned = ((ySign ## Mux(ySign, ~yMantissa, yMantissa)).asUInt + (ySign && !shifter.roundingScrap).asUInt).asSInt //rounding here
|
val ySigned = ((ySign ## Mux(ySign, ~yMantissa, yMantissa)).asUInt + (ySign && !shifter.roundingScrap).asUInt).asSInt //rounding here
|
||||||
val xyMantissa = U(xSigned +^ ySigned).trim(1 bits)
|
val xyMantissa = U(xSigned +^ ySigned).trim(1 bits)
|
||||||
}
|
}
|
||||||
|
@ -916,11 +915,9 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
val shiftOh = OHMasking.first(xyMantissa.asBools.reverse)
|
val shiftOh = OHMasking.first(xyMantissa.asBools.reverse)
|
||||||
val shift = OHToUInt(shiftOh)
|
val shift = OHToUInt(shiftOh)
|
||||||
val mantissa = (xyMantissa |<< shift)
|
val mantissa = (xyMantissa |<< shift)
|
||||||
// val mantissa = ((shifter.roundingScrap.asUInt @@ xyMantissa.reversed) |>> shift).reversed >> 1
|
|
||||||
val exponent = xyExponent -^ shift + 1
|
val exponent = xyExponent -^ shift + 1
|
||||||
xySign clearWhen(input.rs1.isZero && input.rs2.isZero)
|
val forceZero = xyMantissa === 0 || (input.rs1.isZero && input.rs2.isZero)
|
||||||
val forceZero = xyMantissa === 0 || exponent.msb || (input.rs1.isZero && input.rs2.isZero)
|
// val forceOverflow = exponent === exponentOne + 128 //Handled by writeback rounding
|
||||||
val forceOverflow = exponent === exponentOne + 128
|
|
||||||
val forceInfinity = (input.rs1.isInfinity || input.rs2.isInfinity)
|
val forceInfinity = (input.rs1.isInfinity || input.rs2.isInfinity)
|
||||||
val forceNan = input.rs1.isNan || input.rs2.isNan || (input.rs1.isInfinity && input.rs2.isInfinity && (input.rs1.sign ^ input.rs2.sign))
|
val forceNan = input.rs1.isNan || input.rs2.isNan || (input.rs1.isInfinity && input.rs2.isInfinity && (input.rs1.sign ^ input.rs2.sign))
|
||||||
}
|
}
|
||||||
|
@ -949,13 +946,13 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
}
|
}
|
||||||
} elsewhen(norm.forceInfinity) {
|
} elsewhen(norm.forceInfinity) {
|
||||||
output.value.setInfinity
|
output.value.setInfinity
|
||||||
} elsewhen(norm.forceOverflow) {
|
} /*elsewhen(norm.forceOverflow) {
|
||||||
val doMax = input.roundMode.mux(
|
val doMax = input.roundMode.mux(
|
||||||
FpuRoundMode.RNE -> (True),
|
FpuRoundMode.RNE -> (False),
|
||||||
FpuRoundMode.RTZ -> (True),
|
FpuRoundMode.RTZ -> (True),
|
||||||
FpuRoundMode.RDN -> (!output.value.sign),
|
FpuRoundMode.RDN -> (!output.value.sign),
|
||||||
FpuRoundMode.RUP -> (output.value.sign),
|
FpuRoundMode.RUP -> (output.value.sign),
|
||||||
FpuRoundMode.RMM -> (True)
|
FpuRoundMode.RMM -> (False)
|
||||||
)
|
)
|
||||||
when(doMax){
|
when(doMax){
|
||||||
output.value.exponent := exponentOne + 127
|
output.value.exponent := exponentOne + 127
|
||||||
|
@ -963,7 +960,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
} otherwise {
|
} otherwise {
|
||||||
output.value.setInfinity
|
output.value.setInfinity
|
||||||
}
|
}
|
||||||
}
|
}*/
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -992,9 +989,24 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
math.mantissa := adder(0, p.internalMantissaSize bits)
|
math.mantissa := adder(0, p.internalMantissaSize bits)
|
||||||
|
|
||||||
val patched = CombInit(math)
|
val patched = CombInit(math)
|
||||||
when(!input.value.special && math.exponent === exponentOne + 128){
|
when(!math.special && math.exponent >= exponentOne + 128){
|
||||||
|
// patched.setInfinity
|
||||||
|
val doMax = input.roundMode.mux(
|
||||||
|
FpuRoundMode.RNE -> (False),
|
||||||
|
FpuRoundMode.RTZ -> (True),
|
||||||
|
FpuRoundMode.RDN -> (!math.sign),
|
||||||
|
FpuRoundMode.RUP -> (math.sign),
|
||||||
|
FpuRoundMode.RMM -> (False)
|
||||||
|
)
|
||||||
|
when(doMax){
|
||||||
|
patched.exponent := exponentOne + 127
|
||||||
|
patched.mantissa.setAll()
|
||||||
|
} otherwise {
|
||||||
patched.setInfinity
|
patched.setInfinity
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
val output = input.swapPayload(RoundOutput())
|
val output = input.swapPayload(RoundOutput())
|
||||||
output.source := input.source
|
output.source := input.source
|
||||||
|
|
|
@ -69,6 +69,7 @@ class FpuTest extends FunSuite{
|
||||||
|
|
||||||
val f32 = new {
|
val f32 = new {
|
||||||
val add = new TestCase("f32", "add")
|
val add = new TestCase("f32", "add")
|
||||||
|
val mul = new TestCase("f32", "mul")
|
||||||
}
|
}
|
||||||
|
|
||||||
val cpus = for(id <- 0 until portCount) yield new {
|
val cpus = for(id <- 0 until portCount) yield new {
|
||||||
|
@ -137,7 +138,7 @@ class FpuTest extends FunSuite{
|
||||||
storeRaw(rs){rsp => body(b2f(rsp.value.toLong.toInt))}
|
storeRaw(rs){rsp => body(b2f(rsp.value.toLong.toInt))}
|
||||||
}
|
}
|
||||||
|
|
||||||
def mul(rd : Int, rs1 : Int, rs2 : Int): Unit ={
|
def mul(rd : Int, rs1 : Int, rs2 : Int, rounding : FpuRoundMode.E = FpuRoundMode.RNE): Unit ={
|
||||||
cmdQueue += {cmd =>
|
cmdQueue += {cmd =>
|
||||||
cmd.opcode #= cmd.opcode.spinalEnum.MUL
|
cmd.opcode #= cmd.opcode.spinalEnum.MUL
|
||||||
cmd.rs1 #= rs1
|
cmd.rs1 #= rs1
|
||||||
|
@ -145,6 +146,7 @@ class FpuTest extends FunSuite{
|
||||||
cmd.rs3.randomize()
|
cmd.rs3.randomize()
|
||||||
cmd.rd #= rd
|
cmd.rd #= rd
|
||||||
cmd.arg #= 0
|
cmd.arg #= 0
|
||||||
|
cmd.roundMode #= rounding
|
||||||
}
|
}
|
||||||
commitQueue += {cmd =>
|
commitQueue += {cmd =>
|
||||||
cmd.write #= true
|
cmd.write #= true
|
||||||
|
@ -388,6 +390,19 @@ class FpuTest extends FunSuite{
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def testMulExact(a : Float, b : Float, ref : Float, flag : Int, rounding : FpuRoundMode.E): Unit ={
|
||||||
|
val rs = new RegAllocator()
|
||||||
|
val rs1, rs2, rs3 = rs.allocate()
|
||||||
|
val rd = Random.nextInt(32)
|
||||||
|
load(rs1, a)
|
||||||
|
load(rs2, b)
|
||||||
|
mul(rd,rs1,rs2, rounding)
|
||||||
|
storeFloat(rd){v =>
|
||||||
|
assert(f2b(v) == f2b(ref), f"## ${a} * $b = $v, $ref $rounding")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
def testLoadStore(a : Float): Unit ={
|
def testLoadStore(a : Float): Unit ={
|
||||||
val rd = Random.nextInt(32)
|
val rd = Random.nextInt(32)
|
||||||
load(rd, a)
|
load(rd, a)
|
||||||
|
@ -418,6 +433,7 @@ class FpuTest extends FunSuite{
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def testFma(a : Float, b : Float, c : Float): Unit ={
|
def testFma(a : Float, b : Float, c : Float): Unit ={
|
||||||
val rs = new RegAllocator()
|
val rs = new RegAllocator()
|
||||||
val rs1, rs2, rs3 = rs.allocate()
|
val rs1, rs2, rs3 = rs.allocate()
|
||||||
|
@ -609,19 +625,34 @@ class FpuTest extends FunSuite{
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
// for(_ <- 0 until 1000000){
|
||||||
|
// val rounding = FpuRoundMode.RTZ
|
||||||
|
// val (a,b,c,f) = f32.mul(rounding).f32_2
|
||||||
|
// if(a > 0 && b > 0 && !c.isInfinity) testMulExact(a,b,c,f, rounding)
|
||||||
|
// }
|
||||||
|
|
||||||
|
|
||||||
// roundingModes.foreach(rounding => println(Clib.math.addF32(0.0f, 0.0f, rounding.position)))
|
// roundingModes.foreach(rounding => println(Clib.math.addF32(0.0f, 0.0f, rounding.position)))
|
||||||
// roundingModes.foreach(rounding => println(Clib.math.addF32(1.0f,-1.0f, rounding.position)))
|
// roundingModes.foreach(rounding => println(Clib.math.addF32(1.0f,-1.0f, rounding.position)))
|
||||||
|
|
||||||
println()
|
println("Mul done")
|
||||||
println(Clib.math.addF32(8.0f, b2f(0xBf800000), 0))
|
|
||||||
println(Clib.math.addF32(8.0f, b2f(0xBf800001), 0))
|
for(i <- 0 until 20) println(Clib.math.addF32(b2f(0x7f000000), b2f(0x7f000000-10+i), 0))
|
||||||
println(Clib.math.addF32(8.0f, b2f(0xBf800002), 0))
|
// simSuccess()
|
||||||
println(Clib.math.addF32(8.0f, b2f(0xBf800003), 0))
|
|
||||||
println(Clib.math.addF32(8.0f, b2f(0xBf800004), 0))
|
foreachRounding(r => println(Clib.math.addF32(b2f(0x7f7fffff), b2f(0x7f7fffff),r.position)))
|
||||||
println(Clib.math.addF32(8.0f, b2f(0xBf800005), 0))
|
println("")
|
||||||
println(Clib.math.addF32(8.0f, b2f(0xBf800006), 0))
|
foreachRounding(r => println(Clib.math.addF32(2.5787021E38f, 3.4027196E38f,r.position)))
|
||||||
println(Clib.math.addF32(8.0f, b2f(0xBf800007), 0))
|
println("")
|
||||||
println(Clib.math.addF32(8.0f, b2f(0xBf800008), 0))
|
// println(Clib.math.addF32(8.0f, b2f(0xBf800000), 0))
|
||||||
|
// println(Clib.math.addF32(8.0f, b2f(0xBf800001), 0))
|
||||||
|
// println(Clib.math.addF32(8.0f, b2f(0xBf800002), 0))
|
||||||
|
// println(Clib.math.addF32(8.0f, b2f(0xBf800003), 0))
|
||||||
|
// println(Clib.math.addF32(8.0f, b2f(0xBf800004), 0))
|
||||||
|
// println(Clib.math.addF32(8.0f, b2f(0xBf800005), 0))
|
||||||
|
// println(Clib.math.addF32(8.0f, b2f(0xBf800006), 0))
|
||||||
|
// println(Clib.math.addF32(8.0f, b2f(0xBf800007), 0))
|
||||||
|
// println(Clib.math.addF32(8.0f, b2f(0xBf800008), 0))
|
||||||
|
|
||||||
testAdd(-5.3687091E8f, 16.249022f, FpuRoundMode.RNE)
|
testAdd(-5.3687091E8f, 16.249022f, FpuRoundMode.RNE)
|
||||||
testAdd(-5.3687091E8f, 16.0f, FpuRoundMode.RNE)
|
testAdd(-5.3687091E8f, 16.0f, FpuRoundMode.RNE)
|
||||||
|
@ -645,7 +676,13 @@ class FpuTest extends FunSuite{
|
||||||
for(_ <- 0 until 1000000){
|
for(_ <- 0 until 1000000){
|
||||||
val rounding = FpuRoundMode.elements.randomPick()
|
val rounding = FpuRoundMode.elements.randomPick()
|
||||||
val (a,b,c,f) = f32.add(rounding).f32_2
|
val (a,b,c,f) = f32.add(rounding).f32_2
|
||||||
if(/*a > 0 && b < 0 && */!c.isInfinity) testAddExact(a,b,c,f, rounding)
|
// if(a.isNaN) println("Nan")
|
||||||
|
// if(b.isNaN) println("Nan")
|
||||||
|
// if(a.isInfinity) println("Inf")
|
||||||
|
// if(b.isInfinity) println("Inf")
|
||||||
|
// if(a == 0f) println("Zero")
|
||||||
|
// if(b == 0f) println("Zero")
|
||||||
|
/*if(/*a > 0 && b < 0 && */!c.isInfinity) */testAddExact(a,b,c,f, rounding)
|
||||||
}
|
}
|
||||||
|
|
||||||
waitUntil(cmdQueue.isEmpty)
|
waitUntil(cmdQueue.isEmpty)
|
||||||
|
@ -924,10 +961,17 @@ object Clib {
|
||||||
|
|
||||||
object FpuCompileSo extends App{
|
object FpuCompileSo extends App{
|
||||||
|
|
||||||
println(Clib.math.addF32(1.00000011921f, 4.0f, FpuRoundMode.RNE.position))
|
val b2f = lang.Float.intBitsToFloat(_)
|
||||||
println(Clib.math.addF32(1.00000011921f, 4.0f, FpuRoundMode.RTZ.position))
|
for(e <- FpuRoundMode.elements) {
|
||||||
println(Clib.math.addF32(1.00000011921f, 4.0f, FpuRoundMode.RDN.position))
|
println(e)
|
||||||
println(Clib.math.addF32(1.00000011921f, 4.0f, FpuRoundMode.RUP.position))
|
for (i <- -2 until 50) println(i + " => " + Clib.math.addF32(b2f(0x7f000000), b2f(0x7f000000 + i), e.position))
|
||||||
|
println("")
|
||||||
|
}
|
||||||
|
|
||||||
|
// println(Clib.math.addF32(1.00000011921f, 4.0f, FpuRoundMode.RNE.position))
|
||||||
|
// println(Clib.math.addF32(1.00000011921f, 4.0f, FpuRoundMode.RTZ.position))
|
||||||
|
// println(Clib.math.addF32(1.00000011921f, 4.0f, FpuRoundMode.RDN.position))
|
||||||
|
// println(Clib.math.addF32(1.00000011921f, 4.0f, FpuRoundMode.RUP.position))
|
||||||
}
|
}
|
||||||
|
|
||||||
class ProcessStream(cmd : String){
|
class ProcessStream(cmd : String){
|
||||||
|
|
Loading…
Reference in New Issue