fpu integration wip, got mandelbrot to work in linux with no inline (crash when inlined)
This commit is contained in:
parent
8537d18b16
commit
e504afbf18
|
@ -33,7 +33,7 @@ import vexriscv.ip.fpu.FpuParameter
|
||||||
object TestsWorkspace {
|
object TestsWorkspace {
|
||||||
def main(args: Array[String]) {
|
def main(args: Array[String]) {
|
||||||
SpinalConfig().generateVerilog {
|
SpinalConfig().generateVerilog {
|
||||||
// make clean all REDO=10 CSR=no MMU=no COREMARK=no RVF=no REDO=1 DBUS_LOAD_DATA_WIDTH=64 DBUS_STORE_DATA_WIDTH=64 DEBUG=ye TRACE=ye
|
// make clean all REDO=10 CSR=no MMU=no COREMARK=no RVF=yes RVD=yes REDO=1 DBUS_LOAD_DATA_WIDTH=64 DBUS_STORE_DATA_WIDTH=64 DEBUG=ye TRACE=ye
|
||||||
val config = VexRiscvConfig(
|
val config = VexRiscvConfig(
|
||||||
plugins = List(
|
plugins = List(
|
||||||
new IBusCachedPlugin(
|
new IBusCachedPlugin(
|
||||||
|
|
|
@ -15,12 +15,13 @@ import spinal.lib.generator.Handle
|
||||||
import spinal.lib.misc.plic.PlicMapping
|
import spinal.lib.misc.plic.PlicMapping
|
||||||
import spinal.lib.system.debugger.SystemDebuggerConfig
|
import spinal.lib.system.debugger.SystemDebuggerConfig
|
||||||
import vexriscv.ip.{DataCacheAck, DataCacheConfig, DataCacheMemBus, InstructionCache, InstructionCacheConfig}
|
import vexriscv.ip.{DataCacheAck, DataCacheConfig, DataCacheMemBus, InstructionCache, InstructionCacheConfig}
|
||||||
import vexriscv.plugin.{BranchPlugin, CsrAccess, CsrPlugin, CsrPluginConfig, DBusCachedPlugin, DBusSimplePlugin, DYNAMIC_TARGET, DebugPlugin, DecoderSimplePlugin, FullBarrelShifterPlugin, HazardSimplePlugin, IBusCachedPlugin, IBusSimplePlugin, IntAluPlugin, MmuPlugin, MmuPortConfig, MulDivIterativePlugin, MulPlugin, RegFilePlugin, STATIC, SrcPlugin, StaticMemoryTranslatorPlugin, YamlPlugin}
|
import vexriscv.plugin.{BranchPlugin, CsrAccess, CsrPlugin, CsrPluginConfig, DBusCachedPlugin, DBusSimplePlugin, DYNAMIC_TARGET, DebugPlugin, DecoderSimplePlugin, FpuPlugin, FullBarrelShifterPlugin, HazardSimplePlugin, IBusCachedPlugin, IBusSimplePlugin, IntAluPlugin, MmuPlugin, MmuPortConfig, MulDivIterativePlugin, MulPlugin, RegFilePlugin, STATIC, SrcPlugin, StaticMemoryTranslatorPlugin, YamlPlugin}
|
||||||
import vexriscv.{Riscv, VexRiscv, VexRiscvBmbGenerator, VexRiscvConfig, plugin}
|
import vexriscv.{Riscv, VexRiscv, VexRiscvBmbGenerator, VexRiscvConfig, plugin}
|
||||||
|
|
||||||
import scala.collection.mutable
|
import scala.collection.mutable
|
||||||
import scala.collection.mutable.ArrayBuffer
|
import scala.collection.mutable.ArrayBuffer
|
||||||
import spinal.lib.generator._
|
import spinal.lib.generator._
|
||||||
|
import vexriscv.ip.fpu.FpuParameter
|
||||||
|
|
||||||
case class VexRiscvSmpClusterParameter(cpuConfigs : Seq[VexRiscvConfig], withExclusiveAndInvalidation : Boolean, forcePeripheralWidth : Boolean = true, outOfOrderDecoder : Boolean = true)
|
case class VexRiscvSmpClusterParameter(cpuConfigs : Seq[VexRiscvConfig], withExclusiveAndInvalidation : Boolean, forcePeripheralWidth : Boolean = true, outOfOrderDecoder : Boolean = true)
|
||||||
|
|
||||||
|
@ -163,10 +164,15 @@ object VexRiscvSmpClusterGen {
|
||||||
earlyBranch : Boolean = false,
|
earlyBranch : Boolean = false,
|
||||||
dBusCmdMasterPipe : Boolean = false,
|
dBusCmdMasterPipe : Boolean = false,
|
||||||
withMmu : Boolean = true,
|
withMmu : Boolean = true,
|
||||||
withSupervisor : Boolean = true
|
withSupervisor : Boolean = true,
|
||||||
|
withFloat : Boolean = false,
|
||||||
|
withDouble : Boolean = false,
|
||||||
|
externalFpu : Boolean = true
|
||||||
) = {
|
) = {
|
||||||
assert(iCacheSize/iCacheWays <= 4096, "Instruction cache ways can't be bigger than 4096 bytes")
|
assert(iCacheSize/iCacheWays <= 4096, "Instruction cache ways can't be bigger than 4096 bytes")
|
||||||
assert(dCacheSize/dCacheWays <= 4096, "Data cache ways can't be bigger than 4096 bytes")
|
assert(dCacheSize/dCacheWays <= 4096, "Data cache ways can't be bigger than 4096 bytes")
|
||||||
|
assert(!(withDouble && !withFloat))
|
||||||
|
|
||||||
val config = VexRiscvConfig(
|
val config = VexRiscvConfig(
|
||||||
plugins = List(
|
plugins = List(
|
||||||
if(withMmu)new MmuPlugin(
|
if(withMmu)new MmuPlugin(
|
||||||
|
@ -262,7 +268,7 @@ object VexRiscvSmpClusterGen {
|
||||||
mulUnrollFactor = 32,
|
mulUnrollFactor = 32,
|
||||||
divUnrollFactor = 1
|
divUnrollFactor = 1
|
||||||
),
|
),
|
||||||
new CsrPlugin(CsrPluginConfig.openSbi(mhartid = hartId, misa = Riscv.misaToInt("imas")).copy(utimeAccess = CsrAccess.READ_ONLY)),
|
new CsrPlugin(CsrPluginConfig.openSbi(mhartid = hartId, misa = Riscv.misaToInt(s"ima${if(withFloat) "f" else ""}${if(withDouble) "d" else ""}s")).copy(utimeAccess = CsrAccess.READ_ONLY)),
|
||||||
new BranchPlugin(
|
new BranchPlugin(
|
||||||
earlyBranch = earlyBranch,
|
earlyBranch = earlyBranch,
|
||||||
catchAddressMisaligned = true,
|
catchAddressMisaligned = true,
|
||||||
|
@ -271,6 +277,11 @@ object VexRiscvSmpClusterGen {
|
||||||
new YamlPlugin(s"cpu$hartId.yaml")
|
new YamlPlugin(s"cpu$hartId.yaml")
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if(withFloat) config.plugins += new FpuPlugin(
|
||||||
|
externalFpu = true,
|
||||||
|
p = FpuParameter(withDouble = withDouble)
|
||||||
|
)
|
||||||
config
|
config
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -675,7 +675,7 @@ class DataCache(val p : DataCacheConfig, mmuParameter : MemoryTranslatorBusParam
|
||||||
|
|
||||||
val rspSync = True
|
val rspSync = True
|
||||||
val rspLast = True
|
val rspLast = True
|
||||||
val memCmdSent = RegInit(False) setWhen (io.mem.cmd.ready) clearWhen (!io.cpu.writeBack.isStuck)
|
val memCmdSent = RegInit(False) setWhen (io.mem.cmd.fire) clearWhen (!io.cpu.writeBack.isStuck)
|
||||||
val pending = withExclusive generate new Area{
|
val pending = withExclusive generate new Area{
|
||||||
val counter = Reg(UInt(log2Up(pendingMax) + 1 bits)) init(0)
|
val counter = Reg(UInt(log2Up(pendingMax) + 1 bits)) init(0)
|
||||||
val counterNext = counter + U(io.mem.cmd.fire && io.mem.cmd.last) - ((io.mem.rsp.valid && io.mem.rsp.last) ? (io.mem.rsp.aggregated +^ 1) | 0)
|
val counterNext = counter + U(io.mem.cmd.fire && io.mem.cmd.last) - ((io.mem.rsp.valid && io.mem.rsp.last) ? (io.mem.rsp.aggregated +^ 1) | 0)
|
||||||
|
|
|
@ -207,7 +207,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
arbiterOutput.source := arbiter.io.chosen
|
arbiterOutput.source := arbiter.io.chosen
|
||||||
arbiterOutput.payload.assignSomeByName(arbiter.io.output.payload)
|
arbiterOutput.payload.assignSomeByName(arbiter.io.output.payload)
|
||||||
|
|
||||||
val s0 = arbiterOutput.pipelined(m2s = true, s2m = true)
|
val s0 = arbiterOutput.pipelined(m2s = true, s2m = true) //TODO may need to remove m2s for store latency
|
||||||
val useRs1, useRs2, useRs3, useRd = False
|
val useRs1, useRs2, useRs3, useRd = False
|
||||||
switch(s0.opcode){
|
switch(s0.opcode){
|
||||||
is(p.Opcode.LOAD) { useRd := True }
|
is(p.Opcode.LOAD) { useRd := True }
|
||||||
|
@ -287,28 +287,28 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
}
|
}
|
||||||
|
|
||||||
val decode = new Area{
|
val decode = new Area{
|
||||||
val input = read.output.combStage()
|
val input = read.output/*.s2mPipe()*/.combStage()
|
||||||
input.ready := False
|
input.ready := False
|
||||||
|
|
||||||
val loadHit = List(FpuOpcode.LOAD, FpuOpcode.FMV_W_X, FpuOpcode.I2F).map(input.opcode === _).orR
|
val loadHit = List(FpuOpcode.LOAD, FpuOpcode.FMV_W_X, FpuOpcode.I2F).map(input.opcode === _).orR
|
||||||
val load = Stream(LoadInput())
|
val load = Stream(LoadInput())
|
||||||
load.valid := input.valid && loadHit
|
load.valid := input.valid && loadHit
|
||||||
input.ready setWhen(loadHit && load.ready)
|
input.ready setWhen(loadHit && load.ready)
|
||||||
load.payload.assignSomeByName(read.output.payload)
|
load.payload.assignSomeByName(input.payload)
|
||||||
load.i2f := input.opcode === FpuOpcode.I2F
|
load.i2f := input.opcode === FpuOpcode.I2F
|
||||||
|
|
||||||
val shortPipHit = List(FpuOpcode.STORE, FpuOpcode.F2I, FpuOpcode.CMP, FpuOpcode.MIN_MAX, FpuOpcode.SGNJ, FpuOpcode.FMV_X_W, FpuOpcode.FCLASS, FpuOpcode.FCVT_X_X).map(input.opcode === _).orR
|
val shortPipHit = List(FpuOpcode.STORE, FpuOpcode.F2I, FpuOpcode.CMP, FpuOpcode.MIN_MAX, FpuOpcode.SGNJ, FpuOpcode.FMV_X_W, FpuOpcode.FCLASS, FpuOpcode.FCVT_X_X).map(input.opcode === _).orR
|
||||||
val shortPip = Stream(ShortPipInput())
|
val shortPip = Stream(ShortPipInput())
|
||||||
input.ready setWhen(shortPipHit && shortPip.ready)
|
input.ready setWhen(shortPipHit && shortPip.ready)
|
||||||
shortPip.valid := input.valid && shortPipHit
|
shortPip.valid := input.valid && shortPipHit
|
||||||
shortPip.payload.assignSomeByName(read.output.payload)
|
shortPip.payload.assignSomeByName(input.payload)
|
||||||
|
|
||||||
val divSqrtHit = input.opcode === p.Opcode.DIV || input.opcode === p.Opcode.SQRT
|
val divSqrtHit = input.opcode === p.Opcode.DIV || input.opcode === p.Opcode.SQRT
|
||||||
val divSqrt = Stream(DivSqrtInput())
|
val divSqrt = Stream(DivSqrtInput())
|
||||||
if(p.withDivSqrt) {
|
if(p.withDivSqrt) {
|
||||||
input.ready setWhen (divSqrtHit && divSqrt.ready)
|
input.ready setWhen (divSqrtHit && divSqrt.ready)
|
||||||
divSqrt.valid := input.valid && divSqrtHit
|
divSqrt.valid := input.valid && divSqrtHit
|
||||||
divSqrt.payload.assignSomeByName(read.output.payload)
|
divSqrt.payload.assignSomeByName(input.payload)
|
||||||
divSqrt.div := input.opcode === p.Opcode.DIV
|
divSqrt.div := input.opcode === p.Opcode.DIV
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -324,15 +324,15 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
divSqrtToMul.ready := mul.ready
|
divSqrtToMul.ready := mul.ready
|
||||||
mul.payload := divSqrtToMul.payload
|
mul.payload := divSqrtToMul.payload
|
||||||
when(!divSqrtToMul.valid) {
|
when(!divSqrtToMul.valid) {
|
||||||
mul.payload.assignSomeByName(read.output.payload)
|
mul.payload.assignSomeByName(input.payload)
|
||||||
mul.add := fmaHit
|
mul.add := fmaHit
|
||||||
mul.divSqrt := False
|
mul.divSqrt := False
|
||||||
mul.msb1 := True
|
mul.msb1 := True
|
||||||
mul.msb2 := True
|
mul.msb2 := True
|
||||||
mul.rs2.sign.allowOverride();
|
mul.rs2.sign.allowOverride();
|
||||||
mul.rs2.sign := read.output.rs2.sign ^ input.arg(0)
|
mul.rs2.sign := input.rs2.sign ^ input.arg(0)
|
||||||
mul.rs3.sign.allowOverride();
|
mul.rs3.sign.allowOverride();
|
||||||
mul.rs3.sign := read.output.rs3.sign ^ input.arg(1)
|
mul.rs3.sign := input.rs3.sign ^ input.arg(1)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -348,9 +348,9 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
mulToAdd.ready := add.ready
|
mulToAdd.ready := add.ready
|
||||||
add.payload := mulToAdd.payload
|
add.payload := mulToAdd.payload
|
||||||
when(!mulToAdd.valid) {
|
when(!mulToAdd.valid) {
|
||||||
add.payload.assignSomeByName(read.output.payload)
|
add.payload.assignSomeByName(input.payload)
|
||||||
add.rs2.sign.allowOverride;
|
add.rs2.sign.allowOverride;
|
||||||
add.rs2.sign := read.output.rs2.sign ^ input.arg(0)
|
add.rs2.sign := input.rs2.sign ^ input.arg(0)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -578,7 +578,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
val input = UInt(p.internalMantissaSize+1 max 33 bits).assignDontCare()
|
val input = UInt(p.internalMantissaSize+1 max 33 bits).assignDontCare()
|
||||||
var logic = input
|
var logic = input
|
||||||
val scrap = Reg(Bool)
|
val scrap = Reg(Bool)
|
||||||
for(i <- by.range){
|
for(i <- by.range.reverse){
|
||||||
scrap setWhen(by(i) && logic(0, 1 << i bits) =/= 0)
|
scrap setWhen(by(i) && logic(0, 1 << i bits) =/= 0)
|
||||||
logic \= by(i) ? (logic |>> (BigInt(1) << i)) | logic
|
logic \= by(i) ? (logic |>> (BigInt(1) << i)) | logic
|
||||||
}
|
}
|
||||||
|
@ -809,11 +809,13 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
List(FpuOpcode.CMP, FpuOpcode.MIN_MAX).map(input.opcode === _).orR && rs2NanNv
|
List(FpuOpcode.CMP, FpuOpcode.MIN_MAX).map(input.opcode === _).orR && rs2NanNv
|
||||||
flag.NV setWhen(input.valid && nv)
|
flag.NV setWhen(input.valid && nv)
|
||||||
|
|
||||||
input.ready := !halt && (toFpuRf ? rfOutput.ready | io.port.map(_.rsp.ready).read(input.source))
|
val rspStreams = Vec(Stream(FpuRsp(p)), portCount)
|
||||||
|
input.ready := !halt && (toFpuRf ? rfOutput.ready | rspStreams.map(_.ready).read(input.source))
|
||||||
for(i <- 0 until portCount){
|
for(i <- 0 until portCount){
|
||||||
def rsp = io.port(i).rsp
|
def rsp = rspStreams(i)
|
||||||
rsp.valid := input.valid && input.source === i && !toFpuRf && !halt
|
rsp.valid := input.valid && input.source === i && !toFpuRf && !halt
|
||||||
rsp.value := result
|
rsp.value := result
|
||||||
|
io.port(i).rsp << rsp.stage()
|
||||||
completion(i).increments += (RegNext(rsp.fire) init(False))
|
completion(i).increments += (RegNext(rsp.fire) init(False))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -940,7 +942,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
}
|
}
|
||||||
|
|
||||||
val divSqrt = p.withDivSqrt generate new Area {
|
val divSqrt = p.withDivSqrt generate new Area {
|
||||||
val input = decode.divSqrt.stage()
|
val input = decode.divSqrt.halfPipe()
|
||||||
|
|
||||||
val aproxWidth = 8
|
val aproxWidth = 8
|
||||||
val aproxDepth = 64
|
val aproxDepth = 64
|
||||||
|
@ -1142,7 +1144,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
val rs1ExponentEqual = input.rs1.exponent === input.rs2.exponent
|
val rs1ExponentEqual = input.rs1.exponent === input.rs2.exponent
|
||||||
val rs1MantissaBigger = input.rs1.mantissa > input.rs2.mantissa
|
val rs1MantissaBigger = input.rs1.mantissa > input.rs2.mantissa
|
||||||
val absRs1Bigger = ((rs1ExponentBigger || rs1ExponentEqual && rs1MantissaBigger) && !input.rs1.isZero || input.rs1.isInfinity) && !input.rs2.isInfinity
|
val absRs1Bigger = ((rs1ExponentBigger || rs1ExponentEqual && rs1MantissaBigger) && !input.rs1.isZero || input.rs1.isInfinity) && !input.rs2.isInfinity
|
||||||
val shiftBy = rs1ExponentBigger ? (0-exp21) | exp21
|
val shiftBy = exp21.asSInt.abs//rs1ExponentBigger ? (0-exp21) | exp21
|
||||||
val shiftOverflow = (shiftBy >= p.internalMantissaSize+3)
|
val shiftOverflow = (shiftBy >= p.internalMantissaSize+3)
|
||||||
val passThrough = shiftOverflow || (input.rs1.isZero) || (input.rs2.isZero)
|
val passThrough = shiftOverflow || (input.rs1.isZero) || (input.rs2.isZero)
|
||||||
|
|
||||||
|
@ -1153,8 +1155,8 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
val xMantissa = U"1" @@ (rs1ExponentBigger ? input.rs1.mantissa | input.rs2.mantissa) @@ U"00"
|
val xMantissa = U"1" @@ (rs1ExponentBigger ? input.rs1.mantissa | input.rs2.mantissa) @@ U"00"
|
||||||
val yMantissaUnshifted = U"1" @@ (rs1ExponentBigger ? input.rs2.mantissa | input.rs1.mantissa) @@ U"00"
|
val yMantissaUnshifted = U"1" @@ (rs1ExponentBigger ? input.rs2.mantissa | input.rs1.mantissa) @@ U"00"
|
||||||
var yMantissa = CombInit(yMantissaUnshifted)
|
var yMantissa = CombInit(yMantissaUnshifted)
|
||||||
val roundingScrap = CombInit(shiftOverflow)
|
val roundingScrap = False
|
||||||
for(i <- 0 until log2Up(p.internalMantissaSize)){
|
for(i <- log2Up(p.internalMantissaSize) - 1 downto 0){
|
||||||
roundingScrap setWhen(shiftBy(i) && yMantissa(0, 1 << i bits) =/= 0)
|
roundingScrap setWhen(shiftBy(i) && yMantissa(0, 1 << i bits) =/= 0)
|
||||||
yMantissa \= shiftBy(i) ? (yMantissa |>> (BigInt(1) << i)) | yMantissa
|
yMantissa \= shiftBy(i) ? (yMantissa |>> (BigInt(1) << i)) | yMantissa
|
||||||
}
|
}
|
||||||
|
@ -1181,6 +1183,25 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
val xSigned = xMantissa.twoComplement(xSign) //TODO Is that necessary ?
|
val xSigned = xMantissa.twoComplement(xSign) //TODO Is that necessary ?
|
||||||
val ySigned = ((ySign ## Mux(ySign, ~yMantissa, yMantissa)).asUInt + (ySign && !roundingScrap).asUInt).asSInt //rounding here
|
val ySigned = ((ySign ## Mux(ySign, ~yMantissa, yMantissa)).asUInt + (ySign && !roundingScrap).asUInt).asSInt //rounding here
|
||||||
output.xyMantissa := U(xSigned +^ ySigned).trim(1 bits)
|
output.xyMantissa := U(xSigned +^ ySigned).trim(1 bits)
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
class OhOutput extends MathOutput{
|
||||||
|
// val shiftOh = Vec(Bool, p.internalMantissaSize+4)
|
||||||
|
val shift = UInt(log2Up(p.internalMantissaSize+4) bits)
|
||||||
|
}
|
||||||
|
|
||||||
|
val oh = new Area {
|
||||||
|
val input = math.output.stage()
|
||||||
|
val output = input.swapPayload(new OhOutput)
|
||||||
|
output.payload.assignSomeByName(input.payload)
|
||||||
|
import input.payload._
|
||||||
|
|
||||||
|
val shiftOh = OHMasking.first(output.xyMantissa.asBools.reverse) //The OhMasking.first can be processed in parallel to the xyMantissa carry chaine
|
||||||
|
// output.shiftOh := shiftOh
|
||||||
|
|
||||||
|
val shift = OHToUInt(shiftOh)
|
||||||
|
output.shift := shift
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -1193,13 +1214,11 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
}
|
}
|
||||||
|
|
||||||
val norm = new Area{
|
val norm = new Area{
|
||||||
val input = math.output.stage()
|
val input = oh.output.stage()
|
||||||
val output = input.swapPayload(new NormOutput)
|
val output = input.swapPayload(new NormOutput)
|
||||||
output.payload.assignSomeByName(input.payload)
|
output.payload.assignSomeByName(input.payload)
|
||||||
import input.payload._
|
import input.payload._
|
||||||
|
|
||||||
val shiftOh = OHMasking.first(xyMantissa.asBools.reverse)
|
|
||||||
val shift = OHToUInt(shiftOh)
|
|
||||||
output.mantissa := (xyMantissa |<< shift)
|
output.mantissa := (xyMantissa |<< shift)
|
||||||
output.exponent := xyExponent -^ shift + 1
|
output.exponent := xyExponent -^ shift + 1
|
||||||
output.forceInfinity := (input.rs1.isInfinity || input.rs2.isInfinity)
|
output.forceInfinity := (input.rs1.isInfinity || input.rs2.isInfinity)
|
||||||
|
@ -1210,7 +1229,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
}
|
}
|
||||||
|
|
||||||
val result = new Area {
|
val result = new Area {
|
||||||
val input = norm.output.stage()
|
val input = norm.output.pipelined()
|
||||||
val output = input.swapPayload(new MergeInput())
|
val output = input.swapPayload(new MergeInput())
|
||||||
import input.payload._
|
import input.payload._
|
||||||
|
|
||||||
|
@ -1251,7 +1270,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
inputs += load.s1.output.stage()
|
inputs += load.s1.output.stage()
|
||||||
if(p.withAdd) (inputs += add.result.output)
|
if(p.withAdd) (inputs += add.result.output)
|
||||||
if(p.withMul) (inputs += mul.result.output)
|
if(p.withMul) (inputs += mul.result.output)
|
||||||
if(p.withShortPipMisc) (inputs += shortPip.rfOutput)
|
if(p.withShortPipMisc) (inputs += shortPip.rfOutput.pipelined(m2s = true))
|
||||||
val arbitrated = StreamArbiterFactory.lowerFirst.noLock.on(inputs)
|
val arbitrated = StreamArbiterFactory.lowerFirst.noLock.on(inputs)
|
||||||
val isCommited = rf.lock.map(_.commited).read(arbitrated.lockId)
|
val isCommited = rf.lock.map(_.commited).read(arbitrated.lockId)
|
||||||
val commited = arbitrated.haltWhen(!isCommited).toFlow
|
val commited = arbitrated.haltWhen(!isCommited).toFlow
|
||||||
|
@ -1301,7 +1320,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
val mantissaRange = p.internalMantissaSize downto 1
|
val mantissaRange = p.internalMantissaSize downto 1
|
||||||
val adderMantissa = input.value.mantissa(mantissaRange) & (mantissaIncrement ? ~(exactMask.trim(1) >> 1) | input.value.mantissa(mantissaRange).maxValue)
|
val adderMantissa = input.value.mantissa(mantissaRange) & (mantissaIncrement ? ~(exactMask.trim(1) >> 1) | input.value.mantissa(mantissaRange).maxValue)
|
||||||
val adderRightOp = (mantissaIncrement ? (exactMask >> 1)| U(0)).resize(p.internalMantissaSize bits)
|
val adderRightOp = (mantissaIncrement ? (exactMask >> 1)| U(0)).resize(p.internalMantissaSize bits)
|
||||||
val adder = (input.value.exponent @@ adderMantissa) + adderRightOp + U(mantissaIncrement)
|
val adder = KeepAttribute(KeepAttribute(input.value.exponent @@ adderMantissa) + KeepAttribute(adderRightOp) + KeepAttribute(U(mantissaIncrement)))
|
||||||
math.special := input.value.special
|
math.special := input.value.special
|
||||||
math.sign := input.value.sign
|
math.sign := input.value.sign
|
||||||
math.exponent := adder(p.internalMantissaSize, p.internalExponentSize bits)
|
math.exponent := adder(p.internalMantissaSize, p.internalExponentSize bits)
|
||||||
|
|
|
@ -138,6 +138,12 @@ case class FpuFlags() extends Bundle{
|
||||||
case class FpuCompletion() extends Bundle{
|
case class FpuCompletion() extends Bundle{
|
||||||
val flag = FpuFlags()
|
val flag = FpuFlags()
|
||||||
val count = UInt(2 bits)
|
val count = UInt(2 bits)
|
||||||
|
|
||||||
|
def stage() = {
|
||||||
|
val ret = FpuCompletion().setCompositeName(this, "stage", true)
|
||||||
|
ret := this
|
||||||
|
ret
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
case class FpuCmd(p : FpuParameter) extends Bundle{
|
case class FpuCmd(p : FpuParameter) extends Bundle{
|
||||||
|
|
|
@ -72,6 +72,9 @@ class DBusCachedPlugin(val config : DataCacheConfig,
|
||||||
MEMORY_WR -> False
|
MEMORY_WR -> False
|
||||||
) ++ (if(catchSomething) List(HAS_SIDE_EFFECT -> True) else Nil)
|
) ++ (if(catchSomething) List(HAS_SIDE_EFFECT -> True) else Nil)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if(withLrSc) decoderService.add(key, Seq(MEMORY_LRSC -> False))
|
||||||
|
if(withAmo) decoderService.add(key, Seq(MEMORY_AMO -> False))
|
||||||
}
|
}
|
||||||
override def addStoreWordEncoding(key : MaskedLiteral): Unit = {
|
override def addStoreWordEncoding(key : MaskedLiteral): Unit = {
|
||||||
val decoderService = pipeline.service(classOf[DecoderService])
|
val decoderService = pipeline.service(classOf[DecoderService])
|
||||||
|
@ -91,6 +94,9 @@ class DBusCachedPlugin(val config : DataCacheConfig,
|
||||||
MEMORY_WR -> True
|
MEMORY_WR -> True
|
||||||
) ++ (if(catchSomething) List(HAS_SIDE_EFFECT -> True) else Nil)
|
) ++ (if(catchSomething) List(HAS_SIDE_EFFECT -> True) else Nil)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if(withLrSc) decoderService.add(key, Seq(MEMORY_LRSC -> False))
|
||||||
|
if(withAmo) decoderService.add(key, Seq(MEMORY_AMO -> False))
|
||||||
}
|
}
|
||||||
|
|
||||||
val bypassStoreList = ArrayBuffer[(Bool, Bits)]()
|
val bypassStoreList = ArrayBuffer[(Bool, Bits)]()
|
||||||
|
@ -501,6 +507,7 @@ class DBusCachedPlugin(val config : DataCacheConfig,
|
||||||
dBusAccess.rsp.error := cache.io.cpu.writeBack.unalignedAccess || cache.io.cpu.writeBack.accessError
|
dBusAccess.rsp.error := cache.io.cpu.writeBack.unalignedAccess || cache.io.cpu.writeBack.accessError
|
||||||
dBusAccess.rsp.redo := cache.io.cpu.redo
|
dBusAccess.rsp.redo := cache.io.cpu.redo
|
||||||
component.addPrePopTask{() =>
|
component.addPrePopTask{() =>
|
||||||
|
managementStage.input(IS_DBUS_SHARING).getDrivingReg clearWhen(dBusAccess.rsp.fire)
|
||||||
when(forceDatapath){
|
when(forceDatapath){
|
||||||
execute.output(REGFILE_WRITE_DATA) := dBusAccess.cmd.address.asBits
|
execute.output(REGFILE_WRITE_DATA) := dBusAccess.cmd.address.asBits
|
||||||
}
|
}
|
||||||
|
|
|
@ -157,13 +157,13 @@ class FpuPlugin(externalFpu : Boolean = false,
|
||||||
import pipeline.config._
|
import pipeline.config._
|
||||||
import Riscv._
|
import Riscv._
|
||||||
|
|
||||||
val internal = !externalFpu generate pipeline plug new Area{
|
val internal = (!externalFpu).generate (pipeline plug new Area{
|
||||||
val fpu = FpuCore(1, p)
|
val fpu = FpuCore(1, p)
|
||||||
fpu.io.port(0).cmd << port.cmd
|
fpu.io.port(0).cmd << port.cmd
|
||||||
fpu.io.port(0).commit << port.commit
|
fpu.io.port(0).commit << port.commit
|
||||||
fpu.io.port(0).rsp >> port.rsp
|
fpu.io.port(0).rsp >> port.rsp
|
||||||
fpu.io.port(0).completion <> port.completion
|
fpu.io.port(0).completion <> port.completion
|
||||||
}
|
})
|
||||||
|
|
||||||
|
|
||||||
val csr = pipeline plug new Area{
|
val csr = pipeline plug new Area{
|
||||||
|
@ -195,6 +195,7 @@ class FpuPlugin(externalFpu : Boolean = false,
|
||||||
fs := 3 //DIRTY
|
fs := 3 //DIRTY
|
||||||
}
|
}
|
||||||
service.rw(CSR.SSTATUS, 13, fs)
|
service.rw(CSR.SSTATUS, 13, fs)
|
||||||
|
service.rw(CSR.MSTATUS, 13, fs)
|
||||||
}
|
}
|
||||||
|
|
||||||
decode plug new Area{
|
decode plug new Area{
|
||||||
|
@ -259,7 +260,7 @@ class FpuPlugin(externalFpu : Boolean = false,
|
||||||
commit.write := arbitration.isValid && !arbitration.removeIt
|
commit.write := arbitration.isValid && !arbitration.removeIt
|
||||||
commit.sync := input(FPU_COMMIT_SYNC)
|
commit.sync := input(FPU_COMMIT_SYNC)
|
||||||
|
|
||||||
when(arbitration.isValid && !commit.ready){
|
when(isCommit && !commit.ready){
|
||||||
arbitration.haltByOther := True
|
arbitration.haltByOther := True
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue