fpu got proper subnormal support, pass add/mul

This commit is contained in:
Dolu1990 2021-01-26 10:49:53 +01:00
parent bdb5bc1180
commit f818fb3ba4
4 changed files with 304 additions and 125 deletions

View file

@ -227,13 +227,13 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
val input = read.output.combStage() val input = read.output.combStage()
input.ready := False input.ready := False
val loadHit = input.opcode === p.Opcode.LOAD val loadHit = List(FpuOpcode.LOAD, FpuOpcode.FMV_W_X).map(input.opcode === _).orR
val load = Stream(LoadInput()) val load = Stream(LoadInput())
load.valid := input.valid && loadHit load.valid := input.valid && loadHit
input.ready setWhen(loadHit && load.ready) input.ready setWhen(loadHit && load.ready)
load.payload.assignSomeByName(read.output.payload) load.payload.assignSomeByName(read.output.payload)
val shortPipHit = List(FpuOpcode.STORE, FpuOpcode.F2I, FpuOpcode.CMP, FpuOpcode.I2F, FpuOpcode.MIN_MAX, FpuOpcode.SGNJ, FpuOpcode.FMV_X_W, FpuOpcode.FMV_W_X, FpuOpcode.FCLASS).map(input.opcode === _).orR val shortPipHit = List(FpuOpcode.STORE, FpuOpcode.F2I, FpuOpcode.CMP, FpuOpcode.I2F, FpuOpcode.MIN_MAX, FpuOpcode.SGNJ, FpuOpcode.FMV_X_W, FpuOpcode.FCLASS).map(input.opcode === _).orR
val shortPip = Stream(ShortPipInput()) val shortPip = Stream(ShortPipInput())
input.ready setWhen(shortPipHit && shortPip.ready) input.ready setWhen(shortPipHit && shortPip.ready)
shortPip.valid := input.valid && shortPipHit shortPip.valid := input.valid && shortPipHit
@ -283,43 +283,99 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
} }
val load = new Area{ val load = new Area{
val input = decode.load.stage()
val filtred = commitFork.load.map(port => port.takeWhen(port.load))
def feed = filtred(input.source)
val hazard = !feed.valid
val f32Mantissa = feed.value(0, 23 bits).asUInt case class S0() extends Bundle{
val f32Exponent = feed.value(23, 8 bits).asUInt val source = Source()
val f32Sign = feed.value(31) val lockId = lockIdType()
val rd = p.rfAddress()
val value = FpuFloat(exponentSize = p.internalExponentSize-1, mantissaSize = p.internalMantissaSize)
}
val expZero = f32Exponent === 0 val s0 = new Area{
val expOne = f32Exponent === 255 val input = decode.load.stage()
val manZero = f32Mantissa === 0 val filtred = commitFork.load.map(port => port.takeWhen(port.sync))
def feed = filtred(input.source)
val hazard = !feed.valid
val isZero = expZero && manZero val output = input.haltWhen(hazard).swapPayload(S0())
val isSubnormal = expZero && !manZero filtred.foreach(_.ready := False)
val isNormal = !expOne && !expZero feed.ready := input.valid && output.ready
val isInfinity = expOne && manZero output.source := input.source
val isNan = expOne && !manZero output.lockId := input.lockId
val isQuiet = f32Mantissa.msb output.rd := input.rd
output.value.mantissa := feed.value(0, 23 bits).asUInt
output.value.exponent := feed.value(23, 8 bits).asUInt
output.value.sign := feed.value(31)
}
val recoded = p.internalFloating() val s1 = new Area{
recoded.mantissa := f32Mantissa val input = s0.output.stage()
recoded.exponent := f32Exponent val busy = False
recoded.sign := f32Sign
recoded.setNormal
when(isZero){recoded.setZero}
when(isSubnormal){recoded.setSubnormal}
when(isInfinity){recoded.setInfinity}
when(isNan){recoded.setNan}
val output = input.haltWhen(hazard).swapPayload(WriteInput()) val f32Mantissa = input.value.mantissa
filtred.foreach(_.ready := False) val f32Exponent = input.value.exponent
feed.ready := input.valid && output.ready val f32Sign = input.value.sign
output.source := input.source
output.lockId := input.lockId val expZero = f32Exponent === 0
output.rd := input.rd val expOne = f32Exponent === 255
output.value := recoded val manZeroReject = Reg(Bool()) setWhen(busy) clearWhen(!input.isStall)
val manZero = f32Mantissa === 0 && !manZeroReject
val isZero = expZero && manZero
val isSubnormal = expZero && !manZero
val isNormal = !expOne && !expZero
val isInfinity = expOne && manZero
val isNan = expOne && !manZero
val isQuiet = f32Mantissa.msb
val subnormal = new Area{
val manTop = Reg(UInt(log2Up(p.internalMantissaSize) bits))
val shift = isSubnormal ? manTop | U(0)
val counter = Reg(UInt(log2Up(p.internalMantissaSize+1) bits))
val done, boot = Reg(Bool())
when(isSubnormal && !done){
busy := True
when(boot){
manTop := OHToUInt(OHMasking.first((f32Mantissa).reversed))
boot := False
} otherwise {
input.value.mantissa.getDrivingReg := input.value.mantissa |<< 1
counter := counter + 1
when(counter === shift) {
done := True
}
}
}
val expOffset = (UInt(p.internalExponentSize bits))
expOffset := 0
when(isSubnormal){
expOffset := manTop.resized
}
when(!input.isStall){
counter := 0
done := False
boot := True
}
}
val recoded = p.internalFloating()
recoded.mantissa := f32Mantissa
recoded.exponent := (f32Exponent -^ subnormal.expOffset + (exponentOne - 127)).resized
recoded.sign := f32Sign
recoded.setNormal
when(isZero){recoded.setZero}
//when(isSubnormal){recoded.setSubnormal}
when(isInfinity){recoded.setInfinity}
when(isNan){recoded.setNan}
val output = input.haltWhen(busy).swapPayload(WriteInput())
output.source := input.source
output.lockId := input.lockId
output.rd := input.rd
output.value := recoded
}
} }
val shortPip = new Area{ val shortPip = new Area{
@ -330,23 +386,62 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
val result = p.storeLoadType().assignDontCare() val result = p.storeLoadType().assignDontCare()
val recoded = CombInit(input.rs1) val recoded = CombInit(input.rs1)
val halt = False
val recodedResult = Bits(32 bits)//recoded.asBits.resize(32 bits)
val f32 = new Area{
val exp = (recoded.exponent - (exponentOne-127)).resize(8 bits)
val man = recoded.mantissa
}
recodedResult := recoded.sign ## f32.exp ## f32.man
val subnormal = new Area{
val isSubnormal = !recoded.special && recoded.exponent <= exponentOne - 127
val manTop = Reg(UInt(log2Up(p.internalMantissaSize) bits))
val shift = isSubnormal ? manTop | U(0)
val counter = Reg(UInt(log2Up(p.internalMantissaSize+1) bits))
val done, boot = Reg(Bool())
when(isSubnormal && !done){
halt := True
when(boot){
manTop := (U(exponentOne - 127) - recoded.exponent).resized
boot := False
} otherwise {
recoded.mantissa.getDrivingReg := (U(counter === 0) @@ recoded.mantissa) >> 1
counter := counter + 1
when(counter === shift) {
done := True
}
}
}
when(isSubnormal){
f32.exp := 0
}
when(!input.isStall){
counter := 0
done := False
boot := True
}
}
when(recoded.special){ when(recoded.special){
switch(input.rs1.exponent(1 downto 0)){ switch(input.rs1.exponent(1 downto 0)){
is(FpuFloat.ZERO){ is(FpuFloat.ZERO){
recoded.mantissa.clearAll() recodedResult(0,23 bits).clearAll()
recoded.exponent.clearAll() recodedResult(23, 8 bits).clearAll()
} }
is(FpuFloat.INFINITY){ is(FpuFloat.INFINITY){
recoded.mantissa.clearAll() recodedResult(0, 23 bits).clearAll()
recoded.exponent.setAll() recodedResult(23, 8 bits).setAll()
} }
is(FpuFloat.NAN){ is(FpuFloat.NAN){
recoded.exponent.setAll() recodedResult(23, 8 bits).setAll()
} }
} }
} }
val recodedResult = recoded.asBits.resize(32 bits)
val f2iShift = input.rs1.exponent - U(exponentOne) val f2iShift = input.rs1.exponent - U(exponentOne)
val f2iShifted = (U"1" @@ input.rs1.mantissa) << (f2iShift.resize(5 bits)) val f2iShifted = (U"1" @@ input.rs1.mantissa) << (f2iShift.resize(5 bits))
@ -367,32 +462,33 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
3 -> (!rs1AbsSmaller && !rs1Equal) 3 -> (!rs1AbsSmaller && !rs1Equal)
) )
val rawToFpu = new Area{ //TODO
val f32Mantissa = input.value(0, 23 bits).asUInt // val rawToFpu = new Area{
val f32Exponent = input.value(23, 8 bits).asUInt // val f32Mantissa = input.value(0, 23 bits).asUInt
val f32Sign = input.value(31) // val f32Exponent = input.value(23, 8 bits).asUInt
// val f32Sign = input.value(31)
val expZero = f32Exponent === 0 //
val expOne = f32Exponent === 255 // val expZero = f32Exponent === 0
val manZero = f32Mantissa === 0 // val expOne = f32Exponent === 255
// val manZero = f32Mantissa === 0
val isZero = expZero && manZero //
val isSubnormal = expZero && !manZero // val isZero = expZero && manZero
val isNormal = !expOne && !expZero // val isSubnormal = expZero && !manZero
val isInfinity = expOne && manZero // val isNormal = !expOne && !expZero
val isNan = expOne && !manZero // val isInfinity = expOne && manZero
val isQuiet = f32Mantissa.msb // val isNan = expOne && !manZero
// val isQuiet = f32Mantissa.msb
val recoded = p.internalFloating() //
recoded.mantissa := f32Mantissa // val recoded = p.internalFloating()
recoded.exponent := f32Exponent // recoded.mantissa := f32Mantissa
recoded.sign := f32Sign // recoded.exponent := f32Exponent
recoded.setNormal // recoded.sign := f32Sign
when(isZero){recoded.setZero} // recoded.setNormal
when(isSubnormal){recoded.setSubnormal} // when(isZero){recoded.setZero}
when(isInfinity){recoded.setInfinity} // when(isSubnormal){recoded.setSubnormal}
when(isNan){recoded.setNan} // when(isInfinity){recoded.setInfinity}
} // when(isNan){recoded.setNan}
// }
val minMaxResult = (rs1Smaller ^ input.arg(0)) ? input.rs1 | input.rs2 val minMaxResult = (rs1Smaller ^ input.arg(0)) ? input.rs1 | input.rs2
val cmpResult = B(rs1Smaller && !input.arg(1) || rs1Equal && !input.arg(0)) val cmpResult = B(rs1Smaller && !input.arg(1) || rs1Equal && !input.arg(0))
@ -401,10 +497,10 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
val decoded = input.rs1.decode() val decoded = input.rs1.decode()
fclassResult(0) := input.rs1.sign && decoded.isInfinity fclassResult(0) := input.rs1.sign && decoded.isInfinity
fclassResult(1) := input.rs1.sign && decoded.isNormal fclassResult(1) := input.rs1.sign && decoded.isNormal
fclassResult(2) := input.rs1.sign && decoded.isSubnormal // fclassResult(2) := input.rs1.sign && decoded.isSubnormal //TODO
fclassResult(3) := input.rs1.sign && decoded.isZero fclassResult(3) := input.rs1.sign && decoded.isZero
fclassResult(4) := !input.rs1.sign && decoded.isZero fclassResult(4) := !input.rs1.sign && decoded.isZero
fclassResult(5) := !input.rs1.sign && decoded.isSubnormal // fclassResult(5) := !input.rs1.sign && decoded.isSubnormal //TODO
fclassResult(6) := !input.rs1.sign && decoded.isNormal fclassResult(6) := !input.rs1.sign && decoded.isNormal
fclassResult(7) := !input.rs1.sign && decoded.isInfinity fclassResult(7) := !input.rs1.sign && decoded.isInfinity
fclassResult(8) := decoded.isNan && !decoded.isQuiet fclassResult(8) := decoded.isNan && !decoded.isQuiet
@ -419,9 +515,9 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
is(FpuOpcode.FCLASS) { result := fclassResult.resized } is(FpuOpcode.FCLASS) { result := fclassResult.resized }
} }
val toFpuRf = List(FpuOpcode.MIN_MAX, FpuOpcode.I2F, FpuOpcode.SGNJ, FpuOpcode.FMV_W_X).map(input.opcode === _).orR val toFpuRf = List(FpuOpcode.MIN_MAX, FpuOpcode.I2F, FpuOpcode.SGNJ).map(input.opcode === _).orR
rfOutput.valid := input.valid && toFpuRf rfOutput.valid := input.valid && toFpuRf && !halt
rfOutput.source := input.source rfOutput.source := input.source
rfOutput.lockId := input.lockId rfOutput.lockId := input.lockId
rfOutput.rd := input.rd rfOutput.rd := input.rd
@ -442,15 +538,12 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
rfOutput.value.mantissa := input.rs1.mantissa rfOutput.value.mantissa := input.rs1.mantissa
rfOutput.value.special := False //TODO rfOutput.value.special := False //TODO
} }
is(FpuOpcode.FMV_W_X){
rfOutput.value := rawToFpu.recoded
}
} }
input.ready := (toFpuRf ? rfOutput.ready | io.port.map(_.rsp.ready).read(input.source)) input.ready := !halt && (toFpuRf ? rfOutput.ready | io.port.map(_.rsp.ready).read(input.source))
for(i <- 0 until portCount){ for(i <- 0 until portCount){
def rsp = io.port(i).rsp def rsp = io.port(i).rsp
rsp.valid := input.valid && input.source === i && !toFpuRf rsp.valid := input.valid && input.source === i && !toFpuRf && !halt
rsp.value := result rsp.value := result
completion(i).increments += (RegNext(rsp.fire) init(False)) completion(i).increments += (RegNext(rsp.fire) init(False))
} }
@ -463,7 +556,6 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
val mulA = U(input.msb1) @@ input.rs1.mantissa val mulA = U(input.msb1) @@ input.rs1.mantissa
val mulB = U(input.msb2) @@ input.rs2.mantissa val mulB = U(input.msb2) @@ input.rs2.mantissa
val mulC = mulA * mulB val mulC = mulA * mulB
val expOffset = ((1 << p.internalExponentSize - 1) - 1)
val exp = input.rs1.exponent +^ input.rs2.exponent val exp = input.rs1.exponent +^ input.rs2.exponent
} }
@ -478,13 +570,13 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
val man = needShift ? mulRounded(1, p.internalMantissaSize bits) | mulRounded(0, p.internalMantissaSize bits) val man = needShift ? mulRounded(1, p.internalMantissaSize bits) | mulRounded(0, p.internalMantissaSize bits)
val forceZero = input.rs1.isZeroOrSubnormal || input.rs2.isZeroOrSubnormal val forceZero = input.rs1.isZeroOrSubnormal || input.rs2.isZeroOrSubnormal
val forceUnderflow = exp <= math.expOffset val forceUnderflow = exp <= exponentOne + exponentOne - 127 - 23 // 0x6A //TODO
val forceOverflow = exp > math.expOffset+254 || input.rs1.isInfinity || input.rs2.isInfinity val forceOverflow = exp > exponentOne + exponentOne + 127 || input.rs1.isInfinity || input.rs2.isInfinity
val forceNan = input.rs1.isNan || input.rs2.isNan || ((input.rs1.isInfinity || input.rs2.isInfinity) && (input.rs1.isZero || input.rs2.isZero)) val forceNan = input.rs1.isNan || input.rs2.isNan || ((input.rs1.isInfinity || input.rs2.isInfinity) && (input.rs1.isZero || input.rs2.isZero))
val output = FpuFloat(p.internalExponentSize, p.internalMantissaSize) val output = FpuFloat(p.internalExponentSize, p.internalMantissaSize)
output.sign := input.rs1.sign ^ input.rs2.sign output.sign := input.rs1.sign ^ input.rs2.sign
output.exponent := (exp - math.expOffset).resized output.exponent := (exp - exponentOne).resized
output.mantissa := man output.mantissa := man
output.setNormal output.setNormal
@ -702,7 +794,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
val shifter = new Area { val shifter = new Area {
val exp21 = input.rs2.exponent -^ input.rs1.exponent val exp21 = input.rs2.exponent -^ input.rs1.exponent
val rs1ExponentBigger = exp21.msb || input.rs2.isZeroOrSubnormal val rs1ExponentBigger = (exp21.msb || input.rs2.isZeroOrSubnormal) && !input.rs1.isZeroOrSubnormal
val rs1ExponentEqual = input.rs1.exponent === input.rs2.exponent val rs1ExponentEqual = input.rs1.exponent === input.rs2.exponent
val rs1MantissaBigger = input.rs1.mantissa > input.rs2.mantissa val rs1MantissaBigger = input.rs1.mantissa > input.rs2.mantissa
val absRs1Bigger = ((rs1ExponentBigger || rs1ExponentEqual && rs1MantissaBigger) && !input.rs1.isZeroOrSubnormal || input.rs1.isInfinity) && !input.rs2.isInfinity val absRs1Bigger = ((rs1ExponentBigger || rs1ExponentEqual && rs1MantissaBigger) && !input.rs1.isZeroOrSubnormal || input.rs1.isInfinity) && !input.rs2.isInfinity
@ -746,7 +838,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
val exponent = xyExponent -^ shift + 1 val exponent = xyExponent -^ shift + 1
xySign clearWhen(input.rs1.isZeroOrSubnormal && input.rs2.isZeroOrSubnormal) xySign clearWhen(input.rs1.isZeroOrSubnormal && input.rs2.isZeroOrSubnormal)
val forceZero = xyMantissa === 0 || exponent.msb || (input.rs1.isZeroOrSubnormal && input.rs2.isZeroOrSubnormal) val forceZero = xyMantissa === 0 || exponent.msb || (input.rs1.isZeroOrSubnormal && input.rs2.isZeroOrSubnormal)
val forceOverflow = exponent(7 downto 0) === 255 || (input.rs1.isInfinity || input.rs2.isInfinity) val forceOverflow = exponent === exponentOne + 128 || (input.rs1.isInfinity || input.rs2.isInfinity)
val forceNan = input.rs1.isNan || input.rs2.isNan || (input.rs1.isInfinity && input.rs2.isInfinity && (input.rs1.sign ^ input.rs2.sign)) val forceNan = input.rs1.isNan || input.rs2.isNan || (input.rs1.isInfinity && input.rs2.isInfinity && (input.rs1.sign ^ input.rs2.sign))
} }
@ -773,8 +865,28 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
} }
// val format = new Area{
// val input = pipeArbiter.arbitrated.combStage()
//
// val rotate = new Area{
// val input = Bits(p.internalMantissaSize bits)
// val shift = UInt(log2Up(p.internalMantissaSize) bits)
// val output = input.rotateLeft(shift)
// }
//
// val decode = new Area{
// val sign = input.raw(31)
// val exp = input.raw(23, 8 bits).asUInt
// val man = input.raw(23, 8 bits).asUInt
// val isSubnormal = exp === 0 //zero ?
// val manTop = OHToUInt(OHMasking.first((man ## U"1").reversed))
// val shift = isSubnormal ? manTop | U(0)
// rotate.shift := shift
// }
// }
val write = new Area{ val write = new Area{
val arbitrated = StreamArbiterFactory.lowerFirst.noLock.on(List(load.output, add.output, mul.output, shortPip.rfOutput)) val arbitrated = StreamArbiterFactory.lowerFirst.noLock.on(List(load.s1.output, add.output, mul.output, shortPip.rfOutput))
val isCommited = rf.lock.map(_.commited).read(arbitrated.lockId) val isCommited = rf.lock.map(_.commited).read(arbitrated.lockId)
val commited = arbitrated.haltWhen(!isCommited).toFlow val commited = arbitrated.haltWhen(!isCommited).toFlow
@ -794,8 +906,8 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
port.data := commited.value port.data := commited.value
when(port.valid){ when(port.valid){
assert(!(port.data.exponent === 0 && !port.data.special)) assert(!(port.data.exponent === 0 && !port.data.special), "Special violation")
assert(!(port.data.exponent === port.data.exponent.maxValue && !port.data.special)) assert(!(port.data.exponent === port.data.exponent.maxValue && !port.data.special), "Special violation")
} }
} }
} }
@ -831,11 +943,47 @@ object FpuSynthesisBench extends App{
SpinalVerilog(new Component{ SpinalVerilog(new Component{
val a = in UInt(width bits) val a = in UInt(width bits)
val sel = in UInt(log2Up(width) bits) val sel = in UInt(log2Up(width) bits)
val result = out(a.rotateLeft(sel)) val result = out(Delay(Delay(a,3).rotateLeft(Delay(sel,3)),3))
setDefinitionName(Rotate.this.getName()) setDefinitionName(Rotate.this.getName())
}) })
} }
// rotate2_24 ->
// Artix 7 -> 233 Mhz 96 LUT 167 FF
// Artix 7 -> 420 Mhz 86 LUT 229 FF
// rotate2_32 ->
// Artix 7 -> 222 Mhz 108 LUT 238 FF
// Artix 7 -> 399 Mhz 110 LUT 300 FF
// rotate2_52 ->
// Artix 7 -> 195 Mhz 230 LUT 362 FF
// Artix 7 -> 366 Mhz 225 LUT 486 FF
// rotate2_64 ->
// Artix 7 -> 182 Mhz 257 LUT 465 FF
// Artix 7 -> 359 Mhz 266 LUT 591 FF
class Rotate2(width : Int) extends Rtl{
override def getName(): String = "rotate2_" + width
override def getRtlPath(): String = getName() + ".v"
SpinalVerilog(new Component{
val a = in UInt(width bits)
val sel = in UInt(log2Up(width) bits)
val result = out(Delay((U(0, width bits) @@ Delay(a,3)).rotateLeft(Delay(sel,3)),3))
setDefinitionName(Rotate2.this.getName())
})
}
class Rotate3(width : Int) extends Rtl{
override def getName(): String = "rotate3_" + width
override def getRtlPath(): String = getName() + ".v"
SpinalVerilog(new Component{
val a = Delay(in UInt(width bits), 3)
val sel = Delay(in UInt(log2Up(width) bits),3)
// val result =
// val output = Delay(result, 3)
setDefinitionName(Rotate3.this.getName())
})
}
val rtls = ArrayBuffer[Rtl]() val rtls = ArrayBuffer[Rtl]()
// rtls += new Fpu( // rtls += new Fpu(
// "32", // "32",
@ -858,10 +1006,14 @@ object FpuSynthesisBench extends App{
// rtls += new Shifter(32) // rtls += new Shifter(32)
// rtls += new Shifter(52) // rtls += new Shifter(52)
// rtls += new Shifter(64) // rtls += new Shifter(64)
rtls += new Rotate(24) // rtls += new Rotate(24)
rtls += new Rotate(32) // rtls += new Rotate(32)
rtls += new Rotate(52) // rtls += new Rotate(52)
rtls += new Rotate(64) // rtls += new Rotate(64)
rtls += new Rotate3(24)
rtls += new Rotate3(32)
rtls += new Rotate3(52)
rtls += new Rotate3(64)
val targets = XilinxStdTargets()// ++ AlteraStdTargets() val targets = XilinxStdTargets()// ++ AlteraStdTargets()

View file

@ -50,14 +50,14 @@ case class FpuFloat(exponentSize: Int,
def isNormal = !special def isNormal = !special
def isZero = special && exponent(1 downto 0) === 0 def isZero = special && exponent(1 downto 0) === 0
def isSubnormal = special && exponent(1 downto 0) === 1 //def isSubnormal = special && exponent(1 downto 0) === 1
def isInfinity = special && exponent(1 downto 0) === 2 def isInfinity = special && exponent(1 downto 0) === 2
def isNan = special && exponent(1 downto 0) === 3 def isNan = special && exponent(1 downto 0) === 3
def isQuiet = mantissa.msb def isQuiet = mantissa.msb
def setNormal = { special := False } def setNormal = { special := False }
def setZero = { special := True; exponent(1 downto 0) := 0 } def setZero = { special := True; exponent(1 downto 0) := 0 }
def setSubnormal = { special := True; exponent(1 downto 0) := 1 } //def setSubnormal = { special := True; exponent(1 downto 0) := 1 }
def setInfinity = { special := True; exponent(1 downto 0) := 2 } def setInfinity = { special := True; exponent(1 downto 0) := 2 }
def setNan = { special := True; exponent(1 downto 0) := 3 } def setNan = { special := True; exponent(1 downto 0) := 3 }
def setNanQuiet = { special := True; exponent(1 downto 0) := 3; mantissa.msb := True } def setNanQuiet = { special := True; exponent(1 downto 0) := 3; mantissa.msb := True }
@ -65,7 +65,7 @@ case class FpuFloat(exponentSize: Int,
def decode() = { def decode() = {
val ret = FpuFloatDecoded() val ret = FpuFloatDecoded()
ret.isZero := isZero ret.isZero := isZero
ret.isSubnormal := isSubnormal //ret.isSubnormal := isSubnormal
ret.isNormal := isNormal ret.isNormal := isNormal
ret.isInfinity := isInfinity ret.isInfinity := isInfinity
ret.isNan := isNan ret.isNan := isNan
@ -101,7 +101,7 @@ case class FpuParameter( internalMantissaSize : Int,
withDouble : Boolean){ withDouble : Boolean){
val storeLoadType = HardType(Bits(if(withDouble) 64 bits else 32 bits)) val storeLoadType = HardType(Bits(if(withDouble) 64 bits else 32 bits))
val internalExponentSize = if(withDouble) 11 else 8 val internalExponentSize = (if(withDouble) 11 else 8) + 1
val internalFloating = HardType(FpuFloat(exponentSize = internalExponentSize, mantissaSize = internalMantissaSize)) val internalFloating = HardType(FpuFloat(exponentSize = internalExponentSize, mantissaSize = internalMantissaSize))
val rfAddress = HardType(UInt(5 bits)) val rfAddress = HardType(UInt(5 bits))
@ -132,7 +132,7 @@ case class FpuCmd(p : FpuParameter) extends Bundle{
case class FpuCommit(p : FpuParameter) extends Bundle{ case class FpuCommit(p : FpuParameter) extends Bundle{
val write = Bool() val write = Bool()
val load = Bool() val sync = Bool()
val value = p.storeLoadType() // IEEE 754 val value = p.storeLoadType() // IEEE 754
} }

View file

@ -11,7 +11,8 @@ class FpuPlugin(externalFpu : Boolean = false,
object FPU_ENABLE extends Stageable(Bool()) object FPU_ENABLE extends Stageable(Bool())
object FPU_COMMIT extends Stageable(Bool()) object FPU_COMMIT extends Stageable(Bool())
object FPU_LOAD extends Stageable(Bool()) object FPU_COMMIT_SYNC extends Stageable(Bool())
object FPU_COMMIT_LOAD extends Stageable(Bool())
object FPU_RSP extends Stageable(Bool()) object FPU_RSP extends Stageable(Bool())
object FPU_FORKED extends Stageable(Bool()) object FPU_FORKED extends Stageable(Bool())
object FPU_OPCODE extends Stageable(FpuOpcode()) object FPU_OPCODE extends Stageable(FpuOpcode())
@ -28,7 +29,6 @@ class FpuPlugin(externalFpu : Boolean = false,
FPU_ENABLE -> True, FPU_ENABLE -> True,
FPU_COMMIT -> False, FPU_COMMIT -> False,
FPU_RSP -> True, FPU_RSP -> True,
FPU_LOAD -> False,
REGFILE_WRITE_VALID -> True, REGFILE_WRITE_VALID -> True,
BYPASSABLE_EXECUTE_STAGE -> False, BYPASSABLE_EXECUTE_STAGE -> False,
BYPASSABLE_MEMORY_STAGE -> False BYPASSABLE_MEMORY_STAGE -> False
@ -37,8 +37,7 @@ class FpuPlugin(externalFpu : Boolean = false,
val floatRfWrite = List[ENC]( val floatRfWrite = List[ENC](
FPU_ENABLE -> True, FPU_ENABLE -> True,
FPU_COMMIT -> True, FPU_COMMIT -> True,
FPU_RSP -> False, FPU_RSP -> False
FPU_LOAD -> False
) )
val addSub = floatRfWrite :+ FPU_OPCODE -> FpuOpcode.ADD val addSub = floatRfWrite :+ FPU_OPCODE -> FpuOpcode.ADD
@ -60,7 +59,6 @@ class FpuPlugin(externalFpu : Boolean = false,
FPU_ENABLE -> True, FPU_ENABLE -> True,
FPU_OPCODE -> FpuOpcode.LOAD, FPU_OPCODE -> FpuOpcode.LOAD,
FPU_COMMIT -> True, FPU_COMMIT -> True,
FPU_LOAD -> True,
FPU_RSP -> False FPU_RSP -> False
) )
@ -68,7 +66,6 @@ class FpuPlugin(externalFpu : Boolean = false,
FPU_ENABLE -> True, FPU_ENABLE -> True,
FPU_OPCODE -> FpuOpcode.STORE, FPU_OPCODE -> FpuOpcode.STORE,
FPU_COMMIT -> False, FPU_COMMIT -> False,
FPU_LOAD -> False,
FPU_RSP -> True FPU_RSP -> True
) )
@ -164,7 +161,7 @@ class FpuPlugin(externalFpu : Boolean = false,
//Maybe it might be better to not fork before fire to avoid RF stall on commits //Maybe it might be better to not fork before fire to avoid RF stall on commits
val forked = Reg(Bool) setWhen(port.cmd.fire) clearWhen(!arbitration.isStuck) init(False) val forked = Reg(Bool) setWhen(port.cmd.fire) clearWhen(!arbitration.isStuck) init(False)
val intRfReady = Reg(Bool()) setWhen(!arbitration.isStuckByOthers) clearWhen(!arbitration.isStuck) val intRfReady = Reg(Bool()) setWhen(!arbitration.isStuckByOthers) clearWhen(!arbitration.isStuck) //TODO is that still in use ?
val hazard = (input(RS1_USE) && !intRfReady) || csr.pendings.msb || csr.csrActive val hazard = (input(RS1_USE) && !intRfReady) || csr.pendings.msb || csr.csrActive
arbitration.haltItself setWhen(arbitration.isValid && input(FPU_ENABLE) && hazard) arbitration.haltItself setWhen(arbitration.isValid && input(FPU_ENABLE) && hazard)
@ -181,6 +178,9 @@ class FpuPlugin(externalFpu : Boolean = false,
port.cmd.format := FpuFormat.FLOAT port.cmd.format := FpuFormat.FLOAT
insert(FPU_FORKED) := forked || port.cmd.fire insert(FPU_FORKED) := forked || port.cmd.fire
insert(FPU_COMMIT_SYNC) := List(FpuOpcode.LOAD, FpuOpcode.FMV_W_X).map(_ === input(FPU_OPCODE)).orR
insert(FPU_COMMIT_LOAD) := input(FPU_OPCODE) === FpuOpcode.LOAD
} }
writeBack plug new Area{ writeBack plug new Area{
@ -206,9 +206,9 @@ class FpuPlugin(externalFpu : Boolean = false,
// Manage $load // Manage $load
val commit = Stream(FpuCommit(p)) val commit = Stream(FpuCommit(p))
commit.valid := isCommit && arbitration.isMoving commit.valid := isCommit && arbitration.isMoving
commit.value.assignFromBits(output(DBUS_DATA)) commit.value := (input(FPU_COMMIT_LOAD) ? output(DBUS_DATA) | input(RS1))
commit.write := arbitration.isValid commit.write := arbitration.isValid
commit.load := input(FPU_LOAD) commit.sync := input(FPU_COMMIT_SYNC)
when(arbitration.isValid && !commit.ready){ when(arbitration.isValid && !commit.ready){
arbitration.haltByOther := True arbitration.haltByOther := True

View file

@ -17,7 +17,7 @@ class FpuTest extends FunSuite{
val b2f = lang.Float.intBitsToFloat(_) val b2f = lang.Float.intBitsToFloat(_)
val f2b = lang.Float.floatToIntBits(_) val f2b = lang.Float.floatToIntBits(_)
def clamp(f : Float) = { def clamp(f : Float) = {
if(f.abs < b2f(0x00800000)) b2f(f2b(f) & 0x80000000) else f f // if(f.abs < b2f(0x00800000)) b2f(f2b(f) & 0x80000000) else f
} }
test("directed"){ test("directed"){
@ -74,7 +74,7 @@ class FpuTest extends FunSuite{
commitQueue += {cmd => commitQueue += {cmd =>
cmd.write #= true cmd.write #= true
cmd.value #= value cmd.value #= value
cmd.load #= true cmd.sync #= true
} }
} }
@ -112,7 +112,7 @@ class FpuTest extends FunSuite{
} }
commitQueue += {cmd => commitQueue += {cmd =>
cmd.write #= true cmd.write #= true
cmd.load #= false cmd.sync #= false
} }
} }
@ -128,7 +128,7 @@ class FpuTest extends FunSuite{
} }
commitQueue += {cmd => commitQueue += {cmd =>
cmd.write #= true cmd.write #= true
cmd.load #= false cmd.sync #= false
} }
} }
@ -144,7 +144,7 @@ class FpuTest extends FunSuite{
} }
commitQueue += {cmd => commitQueue += {cmd =>
cmd.write #= true cmd.write #= true
cmd.load #= false cmd.sync #= false
} }
} }
@ -160,7 +160,7 @@ class FpuTest extends FunSuite{
} }
commitQueue += {cmd => commitQueue += {cmd =>
cmd.write #= true cmd.write #= true
cmd.load #= false cmd.sync #= false
} }
} }
@ -176,7 +176,7 @@ class FpuTest extends FunSuite{
} }
commitQueue += {cmd => commitQueue += {cmd =>
cmd.write #= true cmd.write #= true
cmd.load #= false cmd.sync #= false
} }
} }
@ -219,7 +219,7 @@ class FpuTest extends FunSuite{
} }
commitQueue += {cmd => commitQueue += {cmd =>
cmd.write #= true cmd.write #= true
cmd.load #= false cmd.sync #= false
} }
} }
@ -239,7 +239,7 @@ class FpuTest extends FunSuite{
def fmv_w_x(rd : Int, value : Int): Unit ={ def fmv_w_x(rd : Int, value : Int): Unit ={
cmdQueue += {cmd => cmdQueue += {cmd =>
cmd.opcode #= cmd.opcode.spinalEnum.FMV_W_X cmd.opcode #= cmd.opcode.spinalEnum.FMV_W_X
cmd.value #= value.toLong & 0xFFFFFFFFl cmd.value.randomize()
cmd.rs1.randomize() cmd.rs1.randomize()
cmd.rs2.randomize() cmd.rs2.randomize()
cmd.rs3.randomize() cmd.rs3.randomize()
@ -248,7 +248,8 @@ class FpuTest extends FunSuite{
} }
commitQueue += {cmd => commitQueue += {cmd =>
cmd.write #= true cmd.write #= true
cmd.load #= false cmd.sync #= true
cmd.value #= value.toLong & 0xFFFFFFFFl
} }
} }
@ -264,7 +265,7 @@ class FpuTest extends FunSuite{
} }
commitQueue += {cmd => commitQueue += {cmd =>
cmd.write #= true cmd.write #= true
cmd.load #= false cmd.sync #= false
} }
} }
@ -281,7 +282,7 @@ class FpuTest extends FunSuite{
} }
commitQueue += {cmd => commitQueue += {cmd =>
cmd.write #= true cmd.write #= true
cmd.load #= false cmd.sync #= false
} }
} }
} }
@ -309,11 +310,12 @@ class FpuTest extends FunSuite{
} }
} }
def checkFloat(ref : Float, dut : Float): Boolean ={ def checkFloat(ref : Float, dut : Float): Boolean ={
if(ref.signum != dut.signum) return false if((f2b(ref) & 0x80000000) != (f2b(dut) & 0x80000000)) return false
if(ref == 0.0 && dut == 0.0 && f2b(ref) != f2b(dut)) return false if(ref == 0.0 && dut == 0.0 && f2b(ref) != f2b(dut)) return false
if(ref.isNaN && dut.isNaN) return true if(ref.isNaN && dut.isNaN) return true
if(ref == dut) return true if(ref == dut) return true
if(ref.abs * 1.0001 > dut.abs && ref.abs * 0.9999 < dut.abs && ref.signum == dut.signum) return true if(ref.abs * 1.0001 + Float.MinPositiveValue >= dut.abs && ref.abs * 0.9999 - Float.MinPositiveValue <= dut.abs) return true
// if(ref + Float.MinPositiveValue*2.0f === dut || dut + Float.MinPositiveValue*2.0f === ref)
false false
} }
def checkFloatExact(ref : Float, dut : Float): Boolean ={ def checkFloatExact(ref : Float, dut : Float): Boolean ={
@ -346,6 +348,16 @@ class FpuTest extends FunSuite{
} }
} }
def testLoadStore(a : Float): Unit ={
val rd = Random.nextInt(32)
load(rd, a)
storeFloat(rd){v =>
val refUnclamped = a
val ref = a
println(f"$a = $v, $ref")
assert(f2b(v) == f2b(ref))
}
}
def testMul(a : Float, b : Float): Unit ={ def testMul(a : Float, b : Float): Unit ={
val rs = new RegAllocator() val rs = new RegAllocator()
val rs1, rs2, rs3 = rs.allocate() val rs1, rs2, rs3 = rs.allocate()
@ -515,7 +527,7 @@ class FpuTest extends FunSuite{
def withMinus(that : Seq[Float]) = that.flatMap(f => List(f, -f)) def withMinus(that : Seq[Float]) = that.flatMap(f => List(f, -f))
val fZeros = withMinus(List(0.0f)) val fZeros = withMinus(List(0.0f))
val fSubnormals = withMinus(List(b2f(0x00000000+1), b2f(0x00000000+2), b2f(0x00800000-2), b2f(0x00800000-1))) val fSubnormals = withMinus(List(b2f(0x00000000+1), b2f(0x00000000+2), b2f(0x00006800), b2f(0x00800000-2), b2f(0x00800000-1)))
val fExpSmall = withMinus(List(b2f(0x00800000), b2f(0x00800000+1), b2f(0x00800000 + 2))) val fExpSmall = withMinus(List(b2f(0x00800000), b2f(0x00800000+1), b2f(0x00800000 + 2)))
val fExpNormal = withMinus(List(b2f(0x3f800000-2), b2f(0x3f800000-1), b2f(0x3f800000), b2f(0x3f800000+1), b2f(0x3f800000+2))) val fExpNormal = withMinus(List(b2f(0x3f800000-2), b2f(0x3f800000-1), b2f(0x3f800000), b2f(0x3f800000+1), b2f(0x3f800000+2)))
val fExpBig = withMinus(List(b2f(0x7f7fffff-2), b2f(0x7f7fffff-1), b2f(0x7f7fffff))) val fExpBig = withMinus(List(b2f(0x7f7fffff-2), b2f(0x7f7fffff-1), b2f(0x7f7fffff)))
@ -533,13 +545,6 @@ class FpuTest extends FunSuite{
testMul(1.2f, 0f)
for(a <- fAll; _ <- 0 until 50) testMul(a, randomFloat())
for(b <- fAll; _ <- 0 until 50) testMul(randomFloat(), b)
for(a <- fAll; b <- fAll) testMul(a, b)
for(_ <- 0 until 1000) testMul(randomFloat(), randomFloat())
testAdd(b2f(0x3f800000), b2f(0x3f800000-1)) testAdd(b2f(0x3f800000), b2f(0x3f800000-1))
testAdd(1.1f, 2.3f) testAdd(1.1f, 2.3f)
testAdd(1.2f, -1.2f) testAdd(1.2f, -1.2f)
@ -555,6 +560,28 @@ class FpuTest extends FunSuite{
for(a <- fAll; b <- fAll) testAdd(a, b) for(a <- fAll; b <- fAll) testAdd(a, b)
for(_ <- 0 until 1000) testAdd(randomFloat(), randomFloat()) for(_ <- 0 until 1000) testAdd(randomFloat(), randomFloat())
testLoadStore(1.2f)
testMul(1.2f, 2.5f)
testMul(b2f(0x00400000), 16.0f)
testMul(b2f(0x00100000), 16.0f)
testMul(b2f(0x00180000), 16.0f)
testMul(b2f(0x00000004), 16.0f)
testMul(b2f(0x00000040), 16.0f)
testMul(b2f(0x00000041), 16.0f)
testMul(b2f(0x00000001), b2f(0x00000001))
testMul(1.0f, b2f(0x00000001))
testMul(0.5f, b2f(0x00000001))
// dut.clockDomain.waitSampling(1000)
// simSuccess()
testMul(1.2f, 0f)
for(a <- fAll; _ <- 0 until 50) testMul(a, randomFloat())
for(b <- fAll; _ <- 0 until 50) testMul(randomFloat(), b)
for(a <- fAll; b <- fAll) testMul(a, b)
for(_ <- 0 until 1000) testMul(randomFloat(), randomFloat())