fpu added proper rounding for add (need to manage substraction)
This commit is contained in:
parent
195e4c422d
commit
1ae84ea83b
|
@ -30,6 +30,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
val rd = p.rfAddress()
|
val rd = p.rfAddress()
|
||||||
val value = Bits(32 bits)
|
val value = Bits(32 bits)
|
||||||
val arg = p.Arg()
|
val arg = p.Arg()
|
||||||
|
val roundMode = FpuRoundMode()
|
||||||
}
|
}
|
||||||
|
|
||||||
case class RfReadOutput() extends Bundle{
|
case class RfReadOutput() extends Bundle{
|
||||||
|
@ -40,6 +41,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
val rd = p.rfAddress()
|
val rd = p.rfAddress()
|
||||||
val value = Bits(32 bits)
|
val value = Bits(32 bits)
|
||||||
val arg = p.Arg()
|
val arg = p.Arg()
|
||||||
|
val roundMode = FpuRoundMode()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -49,6 +51,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
val lockId = lockIdType()
|
val lockId = lockIdType()
|
||||||
val i2f = Bool()
|
val i2f = Bool()
|
||||||
val arg = Bits(2 bits)
|
val arg = Bits(2 bits)
|
||||||
|
val roundMode = FpuRoundMode()
|
||||||
}
|
}
|
||||||
|
|
||||||
case class ShortPipInput() extends Bundle{
|
case class ShortPipInput() extends Bundle{
|
||||||
|
@ -61,6 +64,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
val value = Bits(32 bits)
|
val value = Bits(32 bits)
|
||||||
val arg = Bits(2 bits)
|
val arg = Bits(2 bits)
|
||||||
def rs1 = rs1Raw.as(p.internalFloating)
|
def rs1 = rs1Raw.as(p.internalFloating)
|
||||||
|
val roundMode = FpuRoundMode()
|
||||||
}
|
}
|
||||||
|
|
||||||
case class MulInput() extends Bundle{
|
case class MulInput() extends Bundle{
|
||||||
|
@ -71,6 +75,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
val add = Bool()
|
val add = Bool()
|
||||||
val divSqrt = Bool()
|
val divSqrt = Bool()
|
||||||
val msb1, msb2 = Bool() //allow usage of msb bits of mul
|
val msb1, msb2 = Bool() //allow usage of msb bits of mul
|
||||||
|
val roundMode = FpuRoundMode()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -80,6 +85,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
val rd = p.rfAddress()
|
val rd = p.rfAddress()
|
||||||
val lockId = lockIdType()
|
val lockId = lockIdType()
|
||||||
val div = Bool()
|
val div = Bool()
|
||||||
|
val roundMode = FpuRoundMode()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -88,16 +94,26 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
val rs1, rs2 = p.internalFloating()
|
val rs1, rs2 = p.internalFloating()
|
||||||
val rd = p.rfAddress()
|
val rd = p.rfAddress()
|
||||||
val lockId = lockIdType()
|
val lockId = lockIdType()
|
||||||
|
val roundMode = FpuRoundMode()
|
||||||
}
|
}
|
||||||
|
|
||||||
case class WriteInput() extends Bundle{
|
|
||||||
|
case class MergeInput() extends Bundle{
|
||||||
|
val source = Source()
|
||||||
|
val lockId = lockIdType()
|
||||||
|
val rd = p.rfAddress()
|
||||||
|
val value = p.internalFloating()
|
||||||
|
val round = UInt(2 bits)
|
||||||
|
val roundMode = FpuRoundMode()
|
||||||
|
}
|
||||||
|
|
||||||
|
case class RoundOutput() extends Bundle{
|
||||||
val source = Source()
|
val source = Source()
|
||||||
val lockId = lockIdType()
|
val lockId = lockIdType()
|
||||||
val rd = p.rfAddress()
|
val rd = p.rfAddress()
|
||||||
val value = p.internalFloating()
|
val value = p.internalFloating()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
val rf = new Area{
|
val rf = new Area{
|
||||||
val ram = Mem(p.internalFloating, 32*portCount)
|
val ram = Mem(p.internalFloating, 32*portCount)
|
||||||
val lock = for(i <- 0 until rfLockCount) yield new Area{
|
val lock = for(i <- 0 until rfLockCount) yield new Area{
|
||||||
|
@ -222,6 +238,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
output.lockId := s1LockId
|
output.lockId := s1LockId
|
||||||
output.value := s1.value
|
output.value := s1.value
|
||||||
output.arg := s1.arg
|
output.arg := s1.arg
|
||||||
|
output.roundMode := s1.roundMode
|
||||||
output.rd := s1.rd
|
output.rd := s1.rd
|
||||||
output.rs1 := rf.ram.readSync(s0.source @@ s0.rs1,enable = !output.isStall)
|
output.rs1 := rf.ram.readSync(s0.source @@ s0.rs1,enable = !output.isStall)
|
||||||
output.rs2 := rf.ram.readSync(s0.source @@ s0.rs2,enable = !output.isStall)
|
output.rs2 := rf.ram.readSync(s0.source @@ s0.rs2,enable = !output.isStall)
|
||||||
|
@ -298,6 +315,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
val value = p.storeLoadType()
|
val value = p.storeLoadType()
|
||||||
val i2f = Bool()
|
val i2f = Bool()
|
||||||
val arg = Bits(2 bits)
|
val arg = Bits(2 bits)
|
||||||
|
val roundMode = FpuRoundMode()
|
||||||
}
|
}
|
||||||
|
|
||||||
val s0 = new Area{
|
val s0 = new Area{
|
||||||
|
@ -315,6 +333,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
output.value := feed.value
|
output.value := feed.value
|
||||||
output.i2f := input.i2f
|
output.i2f := input.i2f
|
||||||
output.arg := input.arg
|
output.arg := input.arg
|
||||||
|
output.roundMode := input.roundMode
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -406,17 +425,20 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
when(isInfinity){recoded.setInfinity}
|
when(isInfinity){recoded.setInfinity}
|
||||||
when(isNan){recoded.setNan}
|
when(isNan){recoded.setNan}
|
||||||
|
|
||||||
val output = input.haltWhen(busy).swapPayload(WriteInput())
|
val output = input.haltWhen(busy).swapPayload(MergeInput())
|
||||||
output.source := input.source
|
output.source := input.source
|
||||||
output.lockId := input.lockId
|
output.lockId := input.lockId
|
||||||
|
output.roundMode := input.roundMode
|
||||||
output.rd := input.rd
|
output.rd := input.rd
|
||||||
output.value := recoded
|
output.value := recoded
|
||||||
|
output.round := 0
|
||||||
when(input.i2f){
|
when(input.i2f){
|
||||||
output.value.sign := i2fSign
|
output.value.sign := i2fSign
|
||||||
output.value.exponent := (U(exponentOne+31) - fsm.manTop).resized
|
output.value.exponent := (U(exponentOne+31) - fsm.manTop).resized
|
||||||
output.value.mantissa := U(i2fShifted)
|
output.value.mantissa := U(i2fShifted)
|
||||||
output.value.setNormal
|
output.value.setNormal
|
||||||
when(fsm.i2fZero) { output.value.setZero }
|
when(fsm.i2fZero) { output.value.setZero }
|
||||||
|
//TODO ROUND
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -424,7 +446,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
val shortPip = new Area{
|
val shortPip = new Area{
|
||||||
val input = decode.shortPip.stage()
|
val input = decode.shortPip.stage()
|
||||||
|
|
||||||
val rfOutput = Stream(WriteInput())
|
val rfOutput = Stream(MergeInput())
|
||||||
|
|
||||||
val result = p.storeLoadType().assignDontCare()
|
val result = p.storeLoadType().assignDontCare()
|
||||||
|
|
||||||
|
@ -563,6 +585,8 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
rfOutput.source := input.source
|
rfOutput.source := input.source
|
||||||
rfOutput.lockId := input.lockId
|
rfOutput.lockId := input.lockId
|
||||||
rfOutput.rd := input.rd
|
rfOutput.rd := input.rd
|
||||||
|
rfOutput.roundMode := input.roundMode
|
||||||
|
rfOutput.round := 0 //TODO
|
||||||
rfOutput.value.assignDontCare()
|
rfOutput.value.assignDontCare()
|
||||||
switch(input.opcode){
|
switch(input.opcode){
|
||||||
is(FpuOpcode.MIN_MAX){
|
is(FpuOpcode.MIN_MAX){
|
||||||
|
@ -634,11 +658,13 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
output.payload := math.mulC(p.internalMantissaSize, p.internalMantissaSize+1 bits)
|
output.payload := math.mulC(p.internalMantissaSize, p.internalMantissaSize+1 bits)
|
||||||
}
|
}
|
||||||
|
|
||||||
val output = Stream(WriteInput())
|
val output = Stream(MergeInput())
|
||||||
output.valid := input.valid && !input.add && !input.divSqrt
|
output.valid := input.valid && !input.add && !input.divSqrt
|
||||||
output.source := input.source
|
output.source := input.source
|
||||||
output.lockId := input.lockId
|
output.lockId := input.lockId
|
||||||
output.rd := input.rd
|
output.rd := input.rd
|
||||||
|
output.roundMode := input.roundMode
|
||||||
|
output.round := 0 //TODO
|
||||||
output.value := norm.output
|
output.value := norm.output
|
||||||
|
|
||||||
decode.mulToAdd.valid := input.valid && input.add
|
decode.mulToAdd.valid := input.valid && input.add
|
||||||
|
@ -650,6 +676,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
decode.mulToAdd.rs2 := input.rs3
|
decode.mulToAdd.rs2 := input.rs3
|
||||||
decode.mulToAdd.rd := input.rd
|
decode.mulToAdd.rd := input.rd
|
||||||
decode.mulToAdd.lockId := input.lockId
|
decode.mulToAdd.lockId := input.lockId
|
||||||
|
decode.mulToAdd.roundMode := input.roundMode
|
||||||
|
|
||||||
input.ready := (input.add ? decode.mulToAdd.ready | output.ready) || input.divSqrt
|
input.ready := (input.add ? decode.mulToAdd.ready | output.ready) || input.divSqrt
|
||||||
}
|
}
|
||||||
|
@ -681,6 +708,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
decode.divSqrtToMul.msb2 := True
|
decode.divSqrtToMul.msb2 := True
|
||||||
decode.divSqrtToMul.rs1.special := False //TODO
|
decode.divSqrtToMul.rs1.special := False //TODO
|
||||||
decode.divSqrtToMul.rs2.special := False
|
decode.divSqrtToMul.rs2.special := False
|
||||||
|
decode.divSqrtToMul.roundMode := input.roundMode
|
||||||
|
|
||||||
|
|
||||||
val aprox = new Area {
|
val aprox = new Area {
|
||||||
|
@ -845,7 +873,8 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
val rs1MantissaBigger = input.rs1.mantissa > input.rs2.mantissa
|
val rs1MantissaBigger = input.rs1.mantissa > input.rs2.mantissa
|
||||||
val absRs1Bigger = ((rs1ExponentBigger || rs1ExponentEqual && rs1MantissaBigger) && !input.rs1.isZero || input.rs1.isInfinity) && !input.rs2.isInfinity
|
val absRs1Bigger = ((rs1ExponentBigger || rs1ExponentEqual && rs1MantissaBigger) && !input.rs1.isZero || input.rs1.isInfinity) && !input.rs2.isInfinity
|
||||||
val shiftBy = rs1ExponentBigger ? (0-exp21) | exp21
|
val shiftBy = rs1ExponentBigger ? (0-exp21) | exp21
|
||||||
val passThrough = shiftBy >= p.internalMantissaSize || (input.rs1.isZero) || (input.rs2.isZero)
|
val shiftOverflow = shiftBy >= p.internalMantissaSize
|
||||||
|
val passThrough = shiftOverflow || (input.rs1.isZero) || (input.rs2.isZero)
|
||||||
|
|
||||||
//Note that rs1ExponentBigger can be replaced by absRs1Bigger bellow to avoid xsigned two complement in math block at expense of combinatorial path
|
//Note that rs1ExponentBigger can be replaced by absRs1Bigger bellow to avoid xsigned two complement in math block at expense of combinatorial path
|
||||||
val xySign = absRs1Bigger ? input.rs1.sign | input.rs2.sign
|
val xySign = absRs1Bigger ? input.rs1.sign | input.rs2.sign
|
||||||
|
@ -853,7 +882,14 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
val ySign = xySign ^ (rs1ExponentBigger ? input.rs2.sign | input.rs1.sign)
|
val ySign = xySign ^ (rs1ExponentBigger ? input.rs2.sign | input.rs1.sign)
|
||||||
val xMantissa = U"1" @@ (rs1ExponentBigger ? input.rs1.mantissa | input.rs2.mantissa) @@ U"0"
|
val xMantissa = U"1" @@ (rs1ExponentBigger ? input.rs1.mantissa | input.rs2.mantissa) @@ U"0"
|
||||||
val yMantissaUnshifted = U"1" @@ (rs1ExponentBigger ? input.rs2.mantissa | input.rs1.mantissa) @@ U"0"
|
val yMantissaUnshifted = U"1" @@ (rs1ExponentBigger ? input.rs2.mantissa | input.rs1.mantissa) @@ U"0"
|
||||||
val yMantissa = yMantissaUnshifted >> (passThrough.asUInt @@ shiftBy.resize(log2Up(p.internalMantissaSize)))
|
var yMantissa = yMantissaUnshifted
|
||||||
|
val roundingScrap = CombInit(shiftOverflow)
|
||||||
|
for(i <- 0 until log2Up(p.internalMantissaSize)){
|
||||||
|
roundingScrap setWhen(shiftBy(i) && yMantissa(0, 1 << i bits) =/= 0)
|
||||||
|
yMantissa \= shiftBy(i) ? (yMantissa |>> (BigInt(1) << i)) | yMantissa
|
||||||
|
}
|
||||||
|
when(passThrough) { yMantissa := 0 }
|
||||||
|
// val yMantissa = yMantissaUnshifted >> (passThrough.asUInt @@ shiftBy.resize(log2Up(p.internalMantissaSize))) //Maybe passThrough.asUInt @@ do not infer small logic
|
||||||
val xyExponent = rs1ExponentBigger ? input.rs1.exponent | input.rs2.exponent
|
val xyExponent = rs1ExponentBigger ? input.rs1.exponent | input.rs2.exponent
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -866,9 +902,9 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
def xySign = shifter.xySign
|
def xySign = shifter.xySign
|
||||||
|
|
||||||
val xSigned = xMantissa.twoComplement(xSign)
|
val xSigned = xMantissa.twoComplement(xSign)
|
||||||
// val ySigned = (yMantissa +^ (yMantissa.lsb && !ySign).asUInt).twoComplement(ySign)
|
val ySigned = yMantissa.twoComplement(ySign)
|
||||||
val ySigned = ((ySign ## Mux(ySign, ~yMantissa, yMantissa)).asUInt +^ (ySign || yMantissa.lsb).asUInt).asSInt //rounding here
|
// val ySigned = ((ySign ## Mux(ySign, ~yMantissa, yMantissa)).asUInt +^ (ySign || yMantissa.lsb).asUInt).asSInt //rounding here
|
||||||
val xyMantissa = U(xSigned + ySigned).trim(1 bits)
|
val xyMantissa = U(xSigned +^ ySigned).trim(1 bits)
|
||||||
}
|
}
|
||||||
|
|
||||||
val norm = new Area{
|
val norm = new Area{
|
||||||
|
@ -878,9 +914,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
|
|
||||||
val shiftOh = OHMasking.first(xyMantissa.asBools.reverse)
|
val shiftOh = OHMasking.first(xyMantissa.asBools.reverse)
|
||||||
val shift = OHToUInt(shiftOh)
|
val shift = OHToUInt(shiftOh)
|
||||||
val mantissa = (xyMantissa |<< shift) >> 2
|
val mantissa = (xyMantissa |<< shift)
|
||||||
// val mantissaShifted = (xyMantissa |<< shift)
|
|
||||||
// val mantissa = ((xyMantissa ) >> 2) + U(xyMantissa(1))
|
|
||||||
val exponent = xyExponent -^ shift + 1
|
val exponent = xyExponent -^ shift + 1
|
||||||
xySign clearWhen(input.rs1.isZero && input.rs2.isZero)
|
xySign clearWhen(input.rs1.isZero && input.rs2.isZero)
|
||||||
val forceZero = xyMantissa === 0 || exponent.msb || (input.rs1.isZero && input.rs2.isZero)
|
val forceZero = xyMantissa === 0 || exponent.msb || (input.rs1.isZero && input.rs2.isZero)
|
||||||
|
@ -889,14 +923,16 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
val output = input.swapPayload(WriteInput())
|
val output = input.swapPayload(MergeInput())
|
||||||
output.source := input.source
|
output.source := input.source
|
||||||
output.lockId := input.lockId
|
output.lockId := input.lockId
|
||||||
output.rd := input.rd
|
output.rd := input.rd
|
||||||
output.value.sign := norm.xySign
|
output.value.sign := norm.xySign
|
||||||
output.value.mantissa := norm.mantissa.resized
|
output.value.mantissa := (norm.mantissa >> 2).resized
|
||||||
output.value.exponent := norm.exponent.resized
|
output.value.exponent := norm.exponent.resized
|
||||||
output.value.special := False
|
output.value.special := False
|
||||||
|
output.roundMode := input.roundMode
|
||||||
|
output.round := norm.mantissa(1 downto 0) | (U"0" @@ shifter.roundingScrap)
|
||||||
|
|
||||||
when(norm.forceNan) {
|
when(norm.forceNan) {
|
||||||
output.value.setNanQuiet
|
output.value.setNanQuiet
|
||||||
|
@ -911,25 +947,59 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
val write = new Area{
|
val merge = new Area {
|
||||||
val arbitrated = StreamArbiterFactory.lowerFirst.noLock.on(List(load.s1.output, add.output, mul.output, shortPip.rfOutput))
|
val arbitrated = StreamArbiterFactory.lowerFirst.noLock.on(List(load.s1.output, add.output, mul.output, shortPip.rfOutput))
|
||||||
val isCommited = rf.lock.map(_.commited).read(arbitrated.lockId)
|
val isCommited = rf.lock.map(_.commited).read(arbitrated.lockId)
|
||||||
val commited = arbitrated.haltWhen(!isCommited).toFlow
|
val commited = arbitrated.haltWhen(!isCommited).toFlow
|
||||||
|
}
|
||||||
|
|
||||||
for(i <- 0 until portCount){
|
val round = new Area{
|
||||||
completion(i).increments += (RegNext(commited.fire && commited.source === i) init(False))
|
val input = merge.commited.combStage
|
||||||
|
|
||||||
|
val mantissaIncrement = !input.value.special && input.roundMode.mux(
|
||||||
|
FpuRoundMode.RNE -> (input.round(1) && (input.round(0) || input.value.mantissa.lsb)),
|
||||||
|
FpuRoundMode.RTZ -> False,
|
||||||
|
FpuRoundMode.RDN -> (input.round =/= 0 && input.value.sign),
|
||||||
|
FpuRoundMode.RUP -> (input.round =/= 0 && !input.value.sign),
|
||||||
|
FpuRoundMode.RMM -> (input.round(1))
|
||||||
|
)
|
||||||
|
|
||||||
|
val math = p.internalFloating()
|
||||||
|
val adder = (input.value.exponent @@ input.value.mantissa) + U(mantissaIncrement)
|
||||||
|
math.special := input.value.special
|
||||||
|
math.sign := input.value.sign
|
||||||
|
math.exponent := adder(p.internalMantissaSize, p.internalExponentSize bits)
|
||||||
|
math.mantissa := adder(0, p.internalMantissaSize bits)
|
||||||
|
|
||||||
|
val patched = CombInit(math)
|
||||||
|
when(!input.value.special && math.exponent === exponentOne + 128){
|
||||||
|
patched.setInfinity
|
||||||
}
|
}
|
||||||
|
|
||||||
when(commited.valid){
|
val output = input.swapPayload(RoundOutput())
|
||||||
for(i <- 0 until rfLockCount) when(commited.lockId === i){
|
output.source := input.source
|
||||||
|
output.lockId := input.lockId
|
||||||
|
output.rd := input.rd
|
||||||
|
output.value := patched
|
||||||
|
}
|
||||||
|
|
||||||
|
val writeback = new Area{
|
||||||
|
val input = round.output.combStage
|
||||||
|
|
||||||
|
for(i <- 0 until portCount){
|
||||||
|
completion(i).increments += (RegNext(input.fire && input.source === i) init(False))
|
||||||
|
}
|
||||||
|
|
||||||
|
when(input.valid){
|
||||||
|
for(i <- 0 until rfLockCount) when(input.lockId === i){
|
||||||
rf.lock(i).valid := False
|
rf.lock(i).valid := False
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
val port = rf.ram.writePort
|
val port = rf.ram.writePort
|
||||||
port.valid := commited.valid && rf.lock.map(_.write).read(commited.lockId)
|
port.valid := input.valid && rf.lock.map(_.write).read(input.lockId)
|
||||||
port.address := commited.source @@ commited.rd
|
port.address := input.source @@ input.rd
|
||||||
port.data := commited.value
|
port.data := input.value
|
||||||
|
|
||||||
when(port.valid){
|
when(port.valid){
|
||||||
assert(!(port.data.exponent === 0 && !port.data.special), "Special violation")
|
assert(!(port.data.exponent === 0 && !port.data.special), "Special violation")
|
||||||
|
|
|
@ -89,6 +89,21 @@ object FpuFormat extends SpinalEnum{
|
||||||
val FLOAT, DOUBLE = newElement()
|
val FLOAT, DOUBLE = newElement()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
object FpuRoundMode extends SpinalEnum(defaultEncoding = binarySequential){
|
||||||
|
val RNE, RTZ, RDN, RUP, RMM = newElement()
|
||||||
|
}
|
||||||
|
object FpuRoundModeInstr extends SpinalEnum(){
|
||||||
|
val RNE, RTZ, RDN, RUP, RMM, DYN = newElement()
|
||||||
|
defaultEncoding = SpinalEnumEncoding("opt")(
|
||||||
|
RNE -> 0,
|
||||||
|
RTZ -> 1,
|
||||||
|
RDN -> 2,
|
||||||
|
RUP -> 3,
|
||||||
|
RMM -> 4,
|
||||||
|
DYN -> 7
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
case class FpuParameter( internalMantissaSize : Int,
|
case class FpuParameter( internalMantissaSize : Int,
|
||||||
withDouble : Boolean){
|
withDouble : Boolean){
|
||||||
|
@ -120,6 +135,7 @@ case class FpuCmd(p : FpuParameter) extends Bundle{
|
||||||
val rs1, rs2, rs3 = p.rfAddress()
|
val rs1, rs2, rs3 = p.rfAddress()
|
||||||
val rd = p.rfAddress()
|
val rd = p.rfAddress()
|
||||||
val format = p.Format()
|
val format = p.Format()
|
||||||
|
val roundMode = FpuRoundMode()
|
||||||
}
|
}
|
||||||
|
|
||||||
case class FpuCommit(p : FpuParameter) extends Bundle{
|
case class FpuCommit(p : FpuParameter) extends Bundle{
|
||||||
|
|
|
@ -166,14 +166,18 @@ class FpuPlugin(externalFpu : Boolean = false,
|
||||||
arbitration.haltItself setWhen(arbitration.isValid && input(FPU_ENABLE) && hazard)
|
arbitration.haltItself setWhen(arbitration.isValid && input(FPU_ENABLE) && hazard)
|
||||||
arbitration.haltItself setWhen(port.cmd.isStall)
|
arbitration.haltItself setWhen(port.cmd.isStall)
|
||||||
|
|
||||||
port.cmd.valid := arbitration.isValid && input(FPU_ENABLE) && !forked && !hazard
|
val iRoundMode = input(INSTRUCTION)(funct3Range)
|
||||||
port.cmd.opcode := input(FPU_OPCODE)
|
val roundMode = (input(INSTRUCTION)(funct3Range) === B"111") ? csr.rm | input(INSTRUCTION)(funct3Range)
|
||||||
port.cmd.arg := input(FPU_ARG)
|
|
||||||
port.cmd.rs1 := ((input(FPU_OPCODE) === FpuOpcode.STORE) ? input(INSTRUCTION)(rs2Range).asUInt | input(INSTRUCTION)(rs1Range).asUInt)
|
port.cmd.valid := arbitration.isValid && input(FPU_ENABLE) && !forked && !hazard
|
||||||
port.cmd.rs2 := input(INSTRUCTION)(rs2Range).asUInt
|
port.cmd.opcode := input(FPU_OPCODE)
|
||||||
port.cmd.rs3 := input(INSTRUCTION)(rs3Range).asUInt
|
port.cmd.arg := input(FPU_ARG)
|
||||||
port.cmd.rd := input(INSTRUCTION)(rdRange).asUInt
|
port.cmd.rs1 := ((input(FPU_OPCODE) === FpuOpcode.STORE) ? input(INSTRUCTION)(rs2Range).asUInt | input(INSTRUCTION)(rs1Range).asUInt)
|
||||||
port.cmd.format := FpuFormat.FLOAT
|
port.cmd.rs2 := input(INSTRUCTION)(rs2Range).asUInt
|
||||||
|
port.cmd.rs3 := input(INSTRUCTION)(rs3Range).asUInt
|
||||||
|
port.cmd.rd := input(INSTRUCTION)(rdRange).asUInt
|
||||||
|
port.cmd.format := FpuFormat.FLOAT
|
||||||
|
port.cmd.roundMode := roundMode.as(FpuRoundMode())
|
||||||
|
|
||||||
insert(FPU_FORKED) := forked || port.cmd.fire
|
insert(FPU_FORKED) := forked || port.cmd.fire
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,50 @@
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <jni.h>
|
||||||
|
#include <softfloat.h>
|
||||||
|
|
||||||
|
extern void miaou();
|
||||||
|
|
||||||
|
|
||||||
|
//#include <fenv.h>
|
||||||
|
//#pragma STDC FENV_ACCESS ON
|
||||||
|
//int applyRounding(int rounding){
|
||||||
|
// int ret = fegetround( );
|
||||||
|
// switch(rounding){
|
||||||
|
// case 0: fesetround(FE_TONEAREST); break;
|
||||||
|
// case 1: fesetround(FE_TOWARDZERO); break;
|
||||||
|
// case 2: fesetround(FE_DOWNWARD); break;
|
||||||
|
// case 3: fesetround(FE_UPWARD); break;
|
||||||
|
// }
|
||||||
|
// return ret;
|
||||||
|
//}
|
||||||
|
// const int originalRounding = applyRounding(rounding);
|
||||||
|
// fesetround(originalRounding);
|
||||||
|
|
||||||
|
void applyRounding(int rounding){
|
||||||
|
switch(rounding){
|
||||||
|
case 0: softfloat_roundingMode = 0; break;
|
||||||
|
case 1: softfloat_roundingMode = 1; break;
|
||||||
|
case 2: softfloat_roundingMode = 2; break;
|
||||||
|
case 3: softfloat_roundingMode = 3; break;
|
||||||
|
case 4: softfloat_roundingMode = 4; break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#define API __attribute__((visibility("default")))
|
||||||
|
|
||||||
|
//float32_t toF32(float v){
|
||||||
|
// float32_t x;
|
||||||
|
// x.v = ;
|
||||||
|
// return x;
|
||||||
|
//}
|
||||||
|
|
||||||
|
#define toF32(v) (*((float32_t*)&v))
|
||||||
|
#define fromF32(x) (*((float*)&(x.v)))
|
||||||
|
|
||||||
|
JNIEXPORT jfloat API JNICALL Java_vexriscv_ip_fpu_FpuMath_addF32(JNIEnv * env, jobject obj, jfloat a, jfloat b, jint rounding){
|
||||||
|
applyRounding(rounding);
|
||||||
|
float32_t v = f32_add(toF32(a), toF32(b));
|
||||||
|
return fromF32(v);
|
||||||
|
}
|
|
@ -0,0 +1,4 @@
|
||||||
|
CODEABI_1.0 {
|
||||||
|
global: FpuMath_*;
|
||||||
|
local: *;
|
||||||
|
}
|
|
@ -0,0 +1,11 @@
|
||||||
|
package vexriscv.ip.fpu;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
|
||||||
|
public class FpuMath {
|
||||||
|
public native float addF32(float a, float b, int rounding);
|
||||||
|
|
||||||
|
static{
|
||||||
|
System.load(new File("src/test/cpp/fpu/math/fpu_math.so").getAbsolutePath());
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,12 +1,16 @@
|
||||||
package vexriscv.ip.fpu
|
package vexriscv.ip.fpu
|
||||||
|
|
||||||
|
import java.io.File
|
||||||
import java.lang
|
import java.lang
|
||||||
|
|
||||||
|
import org.apache.commons.io.FileUtils
|
||||||
import org.scalatest.FunSuite
|
import org.scalatest.FunSuite
|
||||||
import spinal.core.SpinalEnumElement
|
import spinal.core.SpinalEnumElement
|
||||||
import spinal.core.sim._
|
import spinal.core.sim._
|
||||||
|
import spinal.lib.DoCmd
|
||||||
import spinal.lib.experimental.math.Floating
|
import spinal.lib.experimental.math.Floating
|
||||||
import spinal.lib.sim._
|
import spinal.lib.sim._
|
||||||
|
import spinal.sim.Backend.{isMac, isWindows}
|
||||||
|
|
||||||
import scala.collection.mutable
|
import scala.collection.mutable
|
||||||
import scala.collection.mutable.ArrayBuffer
|
import scala.collection.mutable.ArrayBuffer
|
||||||
|
@ -113,7 +117,7 @@ class FpuTest extends FunSuite{
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
def add(rd : Int, rs1 : Int, rs2 : Int): Unit ={
|
def add(rd : Int, rs1 : Int, rs2 : Int, rounding : FpuRoundMode.E = FpuRoundMode.RNE): Unit ={
|
||||||
cmdQueue += {cmd =>
|
cmdQueue += {cmd =>
|
||||||
cmd.opcode #= cmd.opcode.spinalEnum.ADD
|
cmd.opcode #= cmd.opcode.spinalEnum.ADD
|
||||||
cmd.rs1 #= rs1
|
cmd.rs1 #= rs1
|
||||||
|
@ -121,6 +125,7 @@ class FpuTest extends FunSuite{
|
||||||
cmd.rs3.randomize()
|
cmd.rs3.randomize()
|
||||||
cmd.rd #= rd
|
cmd.rd #= rd
|
||||||
cmd.arg #= 0
|
cmd.arg #= 0
|
||||||
|
cmd.roundMode #= rounding
|
||||||
}
|
}
|
||||||
commitQueue += {cmd =>
|
commitQueue += {cmd =>
|
||||||
cmd.write #= true
|
cmd.write #= true
|
||||||
|
@ -318,20 +323,21 @@ class FpuTest extends FunSuite{
|
||||||
(Random.nextDouble() * (Math.pow(2.0, exp)) * (if(Random.nextBoolean()) -1.0 else 1.0)).toFloat
|
(Random.nextDouble() * (Math.pow(2.0, exp)) * (if(Random.nextBoolean()) -1.0 else 1.0)).toFloat
|
||||||
}
|
}
|
||||||
|
|
||||||
def testAdd(a : Float, b : Float): Unit ={
|
def testAdd(a : Float, b : Float, rounding : FpuRoundMode.E = FpuRoundMode.RNE): Unit ={
|
||||||
val rs = new RegAllocator()
|
val rs = new RegAllocator()
|
||||||
val rs1, rs2, rs3 = rs.allocate()
|
val rs1, rs2, rs3 = rs.allocate()
|
||||||
val rd = Random.nextInt(32)
|
val rd = Random.nextInt(32)
|
||||||
load(rs1, a)
|
load(rs1, a)
|
||||||
load(rs2, b)
|
load(rs2, b)
|
||||||
|
|
||||||
add(rd,rs1,rs2)
|
add(rd,rs1,rs2, rounding)
|
||||||
storeFloat(rd){v =>
|
storeFloat(rd){v =>
|
||||||
val a_ = clamp(a)
|
val a_ = clamp(a)
|
||||||
val b_ = clamp(b)
|
val b_ = clamp(b)
|
||||||
val ref = clamp(a_ + b_)
|
val ref = Clib.math.addF32(a,b, rounding.position)
|
||||||
println(f"$a + $b = $v, $ref")
|
println(f"${a}%.19f + $b%.19f = $v, $ref $rounding")
|
||||||
assert(checkFloat(ref, v))
|
println(f"${f2b(a).toHexString} + ${f2b(b).toHexString}")
|
||||||
|
assert(checkFloatExact(ref, v))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -547,6 +553,39 @@ class FpuTest extends FunSuite{
|
||||||
val iSigned = iSmall ++ iSmall.map(-_) ++ iBigSigned
|
val iSigned = iSmall ++ iSmall.map(-_) ++ iBigSigned
|
||||||
|
|
||||||
|
|
||||||
|
val roundingModes = FpuRoundMode.elements
|
||||||
|
def foreachRounding(body : FpuRoundMode.E => Unit): Unit ={
|
||||||
|
for(rounding <- roundingModes){
|
||||||
|
body(rounding)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//TODO test and fix a - b rounding
|
||||||
|
foreachRounding(testAdd(1.0f, b2f(0x3f800001), _)) //1.00001
|
||||||
|
foreachRounding(testAdd(4.0f, b2f(0x3f800001), _)) //1.00001
|
||||||
|
for(_ <- 0 until 10000; a = randomFloat(); b = randomFloat()) foreachRounding(testAdd(a.abs, b.abs,_)) //TODO negative
|
||||||
|
|
||||||
|
|
||||||
|
waitUntil(cmdQueue.isEmpty)
|
||||||
|
dut.clockDomain.waitSampling(1000)
|
||||||
|
simSuccess()
|
||||||
|
|
||||||
|
testAdd(b2f(0x3f800000), b2f(0x3f800000-1))
|
||||||
|
testAdd(1.1f, 2.3f)
|
||||||
|
testAdd(1.2f, -1.2f)
|
||||||
|
testAdd(-1.2f, 1.2f)
|
||||||
|
testAdd(0.0f, -1.2f)
|
||||||
|
testAdd(-0.0f, -1.2f)
|
||||||
|
testAdd(1.2f, -0f)
|
||||||
|
testAdd(1.2f, 0f)
|
||||||
|
testAdd(1.1f, Float.MinPositiveValue)
|
||||||
|
|
||||||
|
for(a <- fAll; _ <- 0 until 50) testAdd(a, randomFloat())
|
||||||
|
for(b <- fAll; _ <- 0 until 50) testAdd(randomFloat(), b)
|
||||||
|
for(a <- fAll; b <- fAll) testAdd(a, b)
|
||||||
|
for(_ <- 0 until 1000) testAdd(randomFloat(), randomFloat())
|
||||||
|
|
||||||
|
|
||||||
testLoadStore(1.17549435082e-38f)
|
testLoadStore(1.17549435082e-38f)
|
||||||
testLoadStore(1.4E-45f)
|
testLoadStore(1.4E-45f)
|
||||||
testLoadStore(3.44383110592e-41f)
|
testLoadStore(3.44383110592e-41f)
|
||||||
|
@ -573,21 +612,6 @@ class FpuTest extends FunSuite{
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
testAdd(b2f(0x3f800000), b2f(0x3f800000-1))
|
|
||||||
testAdd(1.1f, 2.3f)
|
|
||||||
testAdd(1.2f, -1.2f)
|
|
||||||
testAdd(-1.2f, 1.2f)
|
|
||||||
testAdd(0.0f, -1.2f)
|
|
||||||
testAdd(-0.0f, -1.2f)
|
|
||||||
testAdd(1.2f, -0f)
|
|
||||||
testAdd(1.2f, 0f)
|
|
||||||
testAdd(1.1f, Float.MinPositiveValue)
|
|
||||||
|
|
||||||
for(a <- fAll; _ <- 0 until 50) testAdd(a, randomFloat())
|
|
||||||
for(b <- fAll; _ <- 0 until 50) testAdd(randomFloat(), b)
|
|
||||||
for(a <- fAll; b <- fAll) testAdd(a, b)
|
|
||||||
for(_ <- 0 until 1000) testAdd(randomFloat(), randomFloat())
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
testLoadStore(1.2f)
|
testLoadStore(1.2f)
|
||||||
|
@ -796,3 +820,24 @@ class FpuTest extends FunSuite{
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
object Clib {
|
||||||
|
val java_home = System.getProperty("java.home")
|
||||||
|
assert(java_home != "" && java_home != null, "JAVA_HOME need to be set")
|
||||||
|
val jdk = java_home.replace("/jre","").replace("\\jre","")
|
||||||
|
val jdkIncludes = jdk + "/include"
|
||||||
|
val flags = List("-fPIC", "-m64", "-shared", "-Wno-attributes") //-Wl,--whole-archive
|
||||||
|
val os = new File("/media/data/open/SaxonSoc/berkeley-softfloat-3/build/Linux-x86_64-GCC").listFiles().map(_.getAbsolutePath).filter(_.toString.endsWith(".o"))
|
||||||
|
val cmd = s"gcc -I/media/data/open/SaxonSoc/berkeley-softfloat-3/source/include -I$jdkIncludes -I$jdkIncludes/linux ${flags.mkString(" ")} -o src/test/cpp/fpu/math/fpu_math.so src/test/cpp/fpu/math/fpu_math.c src/test/cpp/fpu/math/softfloat.a" // src/test/cpp/fpu/math/softfloat.a
|
||||||
|
DoCmd.doCmd(cmd)
|
||||||
|
val math = new FpuMath
|
||||||
|
}
|
||||||
|
|
||||||
|
object FpuCompileSo extends App{
|
||||||
|
|
||||||
|
println(Clib.math.addF32(1.00000011921f, 4.0f, FpuRoundMode.RNE.position))
|
||||||
|
println(Clib.math.addF32(1.00000011921f, 4.0f, FpuRoundMode.RTZ.position))
|
||||||
|
println(Clib.math.addF32(1.00000011921f, 4.0f, FpuRoundMode.RDN.position))
|
||||||
|
println(Clib.math.addF32(1.00000011921f, 4.0f, FpuRoundMode.RUP.position))
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue