fpu now track commits using a counter per pipeline per port
This commit is contained in:
parent
81c193af1f
commit
636d53cf63
|
@ -26,7 +26,6 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
val exponentF64Infinity = exponentOne+1023+1
|
val exponentF64Infinity = exponentOne+1023+1
|
||||||
|
|
||||||
|
|
||||||
val lockIdType = HardType(UInt(log2Up(p.rfLockCount) bits))
|
|
||||||
|
|
||||||
def whenDouble(format : FpuFormat.C)(yes : => Unit)(no : => Unit): Unit ={
|
def whenDouble(format : FpuFormat.C)(yes : => Unit)(no : => Unit): Unit ={
|
||||||
if(p.withDouble) when(format === FpuFormat.DOUBLE) { yes } otherwise{ no }
|
if(p.withDouble) when(format === FpuFormat.DOUBLE) { yes } otherwise{ no }
|
||||||
|
@ -51,7 +50,6 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
case class RfReadOutput() extends Bundle{
|
case class RfReadOutput() extends Bundle{
|
||||||
val source = Source()
|
val source = Source()
|
||||||
val opcode = p.Opcode()
|
val opcode = p.Opcode()
|
||||||
val lockId = lockIdType()
|
|
||||||
val rs1, rs2, rs3 = p.internalFloating()
|
val rs1, rs2, rs3 = p.internalFloating()
|
||||||
val rd = p.rfAddress()
|
val rd = p.rfAddress()
|
||||||
val arg = p.Arg()
|
val arg = p.Arg()
|
||||||
|
@ -64,7 +62,6 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
case class LoadInput() extends Bundle{
|
case class LoadInput() extends Bundle{
|
||||||
val source = Source()
|
val source = Source()
|
||||||
val rd = p.rfAddress()
|
val rd = p.rfAddress()
|
||||||
val lockId = lockIdType()
|
|
||||||
val i2f = Bool()
|
val i2f = Bool()
|
||||||
val arg = Bits(2 bits)
|
val arg = Bits(2 bits)
|
||||||
val roundMode = FpuRoundMode()
|
val roundMode = FpuRoundMode()
|
||||||
|
@ -75,7 +72,6 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
val source = Source()
|
val source = Source()
|
||||||
val opcode = p.Opcode()
|
val opcode = p.Opcode()
|
||||||
val rs1, rs2 = p.internalFloating()
|
val rs1, rs2 = p.internalFloating()
|
||||||
val lockId = lockIdType()
|
|
||||||
val rd = p.rfAddress()
|
val rd = p.rfAddress()
|
||||||
val value = Bits(32 bits)
|
val value = Bits(32 bits)
|
||||||
val arg = Bits(2 bits)
|
val arg = Bits(2 bits)
|
||||||
|
@ -88,7 +84,6 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
val source = Source()
|
val source = Source()
|
||||||
val rs1, rs2, rs3 = p.internalFloating()
|
val rs1, rs2, rs3 = p.internalFloating()
|
||||||
val rd = p.rfAddress()
|
val rd = p.rfAddress()
|
||||||
val lockId = lockIdType()
|
|
||||||
val add = Bool()
|
val add = Bool()
|
||||||
val divSqrt = Bool()
|
val divSqrt = Bool()
|
||||||
val msb1, msb2 = Bool() //allow usage of msb bits of mul
|
val msb1, msb2 = Bool() //allow usage of msb bits of mul
|
||||||
|
@ -101,7 +96,6 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
val source = Source()
|
val source = Source()
|
||||||
val rs1, rs2 = p.internalFloating()
|
val rs1, rs2 = p.internalFloating()
|
||||||
val rd = p.rfAddress()
|
val rd = p.rfAddress()
|
||||||
val lockId = lockIdType()
|
|
||||||
val div = Bool()
|
val div = Bool()
|
||||||
val roundMode = FpuRoundMode()
|
val roundMode = FpuRoundMode()
|
||||||
val format = p.withDouble generate FpuFormat()
|
val format = p.withDouble generate FpuFormat()
|
||||||
|
@ -111,7 +105,6 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
val source = Source()
|
val source = Source()
|
||||||
val rs1, rs2 = p.internalFloating()
|
val rs1, rs2 = p.internalFloating()
|
||||||
val rd = p.rfAddress()
|
val rd = p.rfAddress()
|
||||||
val lockId = lockIdType()
|
|
||||||
val roundMode = FpuRoundMode()
|
val roundMode = FpuRoundMode()
|
||||||
val format = p.withDouble generate FpuFormat()
|
val format = p.withDouble generate FpuFormat()
|
||||||
}
|
}
|
||||||
|
@ -121,7 +114,6 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
val source = Source()
|
val source = Source()
|
||||||
val rs1 = p.internalFloating()
|
val rs1 = p.internalFloating()
|
||||||
val rd = p.rfAddress()
|
val rd = p.rfAddress()
|
||||||
val lockId = lockIdType()
|
|
||||||
val roundMode = FpuRoundMode()
|
val roundMode = FpuRoundMode()
|
||||||
val format = p.withDouble generate FpuFormat()
|
val format = p.withDouble generate FpuFormat()
|
||||||
}
|
}
|
||||||
|
@ -131,15 +123,14 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
val source = Source()
|
val source = Source()
|
||||||
val rs1, rs2 = p.internalFloating()
|
val rs1, rs2 = p.internalFloating()
|
||||||
val rd = p.rfAddress()
|
val rd = p.rfAddress()
|
||||||
val lockId = lockIdType()
|
|
||||||
val roundMode = FpuRoundMode()
|
val roundMode = FpuRoundMode()
|
||||||
val format = p.withDouble generate FpuFormat()
|
val format = p.withDouble generate FpuFormat()
|
||||||
|
val needCommit = Bool()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class MergeInput() extends Bundle{
|
class MergeInput() extends Bundle{
|
||||||
val source = Source()
|
val source = Source()
|
||||||
val lockId = lockIdType()
|
|
||||||
val rd = p.rfAddress()
|
val rd = p.rfAddress()
|
||||||
val value = p.writeFloating()
|
val value = p.writeFloating()
|
||||||
val scrap = Bool()
|
val scrap = Bool()
|
||||||
|
@ -151,7 +142,6 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
|
|
||||||
case class RoundOutput() extends Bundle{
|
case class RoundOutput() extends Bundle{
|
||||||
val source = Source()
|
val source = Source()
|
||||||
val lockId = lockIdType()
|
|
||||||
val rd = p.rfAddress()
|
val rd = p.rfAddress()
|
||||||
val value = p.internalFloating()
|
val value = p.internalFloating()
|
||||||
val format = p.withDouble generate FpuFormat()
|
val format = p.withDouble generate FpuFormat()
|
||||||
|
@ -165,16 +155,28 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
val boxed = p.withDouble generate Bool()
|
val boxed = p.withDouble generate Bool()
|
||||||
}
|
}
|
||||||
val ram = Mem(Entry(), 32*portCount)
|
val ram = Mem(Entry(), 32*portCount)
|
||||||
val lock = for(i <- 0 until p.rfLockCount) yield new Area{
|
|
||||||
val valid = RegInit(False)
|
val init = new Area{
|
||||||
val source = Reg(Source())
|
val counter = Reg(UInt(6 bits)) init(0)
|
||||||
val address = Reg(p.rfAddress)
|
val done = CombInit(counter.msb)
|
||||||
val id = Reg(UInt(log2Up(p.rfLockCount+1) bits))
|
when(!done){
|
||||||
val commited = Reg(Bool)
|
counter := counter + 1
|
||||||
val write = Reg(Bool)
|
|
||||||
}
|
}
|
||||||
val lockFree = !lock.map(_.valid).andR
|
def apply(port : Flow[MemWriteCmd[Bool]]) = {
|
||||||
val lockFreeId = OHMasking.first(lock.map(!_.valid))
|
port.valid := !done
|
||||||
|
port.address := counter.resized
|
||||||
|
port.data := False
|
||||||
|
port
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
val scoreboards = Array.fill(portCount)(new Area{
|
||||||
|
val target, hit = Mem(Bool, 32) // XOR
|
||||||
|
val writes = Mem(Bool, 32)
|
||||||
|
|
||||||
|
val targetWrite = init(target.writePort)
|
||||||
|
val hitWrite = init(hit.writePort)
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
// val completion = for(source <- 0 until portCount) yield new Area{
|
// val completion = for(source <- 0 until portCount) yield new Area{
|
||||||
|
@ -202,39 +204,42 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
val commitLogic = for(source <- 0 until portCount) yield new Area{
|
class Tracker(width : Int) extends Area{
|
||||||
val fire = False
|
val counter = Reg(UInt(width bits)) init(0)
|
||||||
val target, hit = Reg(UInt(log2Up(p.rfLockCount+1) bits)) init(0)
|
val full = counter.andR
|
||||||
val full = target + 1 === hit
|
val notEmpty = counter.orR
|
||||||
when(fire){
|
val inc = False
|
||||||
hit := hit + 1
|
val dec = False
|
||||||
|
counter := counter + U(inc) - U(dec)
|
||||||
}
|
}
|
||||||
|
|
||||||
commitFork.commit(source).ready := False
|
class CommitArea(source : Int) extends Area{
|
||||||
when(commitFork.commit(source).valid) {
|
val add, mul, div, sqrt, short = new Tracker(4)
|
||||||
for (lock <- rf.lock) {
|
val input = commitFork.commit(source).haltWhen(List(add, mul, div, sqrt, short).map(_.full).orR).toFlow
|
||||||
when(lock.valid && lock.source === source && lock.id === hit && !lock.commited) {
|
|
||||||
fire := True
|
when(input.fire){
|
||||||
lock.commited := True
|
add.inc setWhen(List(FpuOpcode.ADD).map(input.opcode === _).orR)
|
||||||
lock.write := commitFork.commit(source).write
|
mul.inc setWhen(List(FpuOpcode.MUL, FpuOpcode.FMA).map(input.opcode === _).orR)
|
||||||
commitFork.commit(source).ready := True
|
div.inc setWhen(List(FpuOpcode.DIV).map(input.opcode === _).orR)
|
||||||
}
|
sqrt.inc setWhen(List(FpuOpcode.SQRT).map(input.opcode === _).orR)
|
||||||
}
|
short.inc setWhen(List(FpuOpcode.SGNJ, FpuOpcode.MIN_MAX, FpuOpcode.FCVT_X_X).map(input.opcode === _).orR)
|
||||||
|
rf.scoreboards(source).writes(input.rd) := input.write
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
val read = new Area{
|
val commitLogic = for(source <- 0 until portCount) yield new CommitArea(source)
|
||||||
val arbiter = StreamArbiterFactory.noLock.roundRobin.build(FpuCmd(p), portCount)
|
|
||||||
arbiter.io.inputs <> Vec(io.port.map(_.cmd))
|
|
||||||
|
|
||||||
val arbiterOutput = Stream(RfReadInput())
|
def commitConsume(what : CommitArea => Tracker, source : UInt, fire : Bool) : Bool = {
|
||||||
arbiterOutput.arbitrationFrom(arbiter.io.output)
|
for(i <- 0 until portCount) what(commitLogic(i)).dec setWhen(fire && source === i)
|
||||||
arbiterOutput.source := arbiter.io.chosen
|
commitLogic.map(what(_).notEmpty).read(source)
|
||||||
arbiterOutput.payload.assignSomeByName(arbiter.io.output.payload)
|
}
|
||||||
|
|
||||||
val s0 = arbiterOutput.pipelined(m2s = true, s2m = true) //TODO may need to remove m2s for store latency
|
|
||||||
|
val scheduler = for(portId <- 0 until portCount;
|
||||||
|
scoreboard = rf.scoreboards(portId)) yield new Area{
|
||||||
|
val input = io.port(portId).cmd.combStage()
|
||||||
val useRs1, useRs2, useRs3, useRd = False
|
val useRs1, useRs2, useRs3, useRd = False
|
||||||
switch(s0.opcode){
|
switch(input.opcode){
|
||||||
is(p.Opcode.LOAD) { useRd := True }
|
is(p.Opcode.LOAD) { useRd := True }
|
||||||
is(p.Opcode.STORE) { useRs1 := True }
|
is(p.Opcode.STORE) { useRs1 := True }
|
||||||
is(p.Opcode.ADD) { useRd := True; useRs1 := True; useRs2 := True }
|
is(p.Opcode.ADD) { useRd := True; useRs1 := True; useRs2 := True }
|
||||||
|
@ -253,34 +258,43 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
is(p.Opcode.FCVT_X_X ) { useRd := True; useRs1 := True }
|
is(p.Opcode.FCVT_X_X ) { useRd := True; useRs1 := True }
|
||||||
}
|
}
|
||||||
|
|
||||||
val hits = List((useRs1, s0.rs1), (useRs2, s0.rs2), (useRs3, s0.rs3), (useRd, s0.rd)).map{case (use, reg) => use && rf.lock.map(l => l.valid && l.source === s0.source && l.address === reg).orR}
|
val uses = List(useRs1, useRs2, useRs3, useRd)
|
||||||
val hazard = hits.orR || commitLogic.map(_.full).read(s0.source)
|
val regs = List(input.rs1, input.rs2, input.rs3, input.rd)
|
||||||
when(s0.fire && useRd){
|
val rfHits = regs.map(scoreboard.hit.readAsync(_))
|
||||||
for(i <- 0 until portCount){
|
val rfTargets = regs.map(scoreboard.target.readAsync(_))
|
||||||
when(s0.source === i){
|
val rfBusy = (rfHits, rfTargets).zipped.map(_ ^ _)
|
||||||
commitLogic(i).target := commitLogic(i).target + 1
|
|
||||||
}
|
val hits = (0 to 3).map(id => uses(id) && rfBusy(id))
|
||||||
}
|
val hazard = hits.orR || !rf.init.done
|
||||||
for(i <- 0 until p.rfLockCount){
|
val output = input.haltWhen(hazard)
|
||||||
when(rf.lockFreeId(i)){
|
when(input.valid && rf.init.done){
|
||||||
rf.lock(i).valid := True
|
scoreboard.targetWrite.address := input.rd
|
||||||
rf.lock(i).source := s0.source
|
scoreboard.targetWrite.data := !rfTargets.last
|
||||||
rf.lock(i).address := s0.rd
|
|
||||||
rf.lock(i).id := commitLogic.map(_.target).read(s0.source)
|
|
||||||
rf.lock(i).commited := False
|
|
||||||
}
|
}
|
||||||
|
when(output.fire && useRd){
|
||||||
|
scoreboard.targetWrite.valid := True
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
val s1 = s0.haltWhen(hazard || !rf.lockFree).m2sPipe()
|
|
||||||
|
val cmdArbiter = new Area{
|
||||||
|
val arbiter = StreamArbiterFactory.noLock.roundRobin.build(FpuCmd(p), portCount)
|
||||||
|
arbiter.io.inputs <> Vec(scheduler.map(_.output))
|
||||||
|
|
||||||
|
val output = arbiter.io.output.swapPayload(RfReadInput())
|
||||||
|
output.source := arbiter.io.chosen
|
||||||
|
output.payload.assignSomeByName(arbiter.io.output.payload)
|
||||||
|
}
|
||||||
|
|
||||||
|
val read = new Area{
|
||||||
|
val s0 = cmdArbiter.output.pipelined(m2s = true, s2m = true) //TODO may need to remove m2s for store latency
|
||||||
|
val s1 = s0.m2sPipe()
|
||||||
val output = s1.swapPayload(RfReadOutput())
|
val output = s1.swapPayload(RfReadOutput())
|
||||||
val s1LockId = RegNextWhen(OHToUInt(rf.lockFreeId), !output.isStall)
|
|
||||||
val rs1Entry = rf.ram.readSync(s0.source @@ s0.rs1,enable = !output.isStall)
|
val rs1Entry = rf.ram.readSync(s0.source @@ s0.rs1,enable = !output.isStall)
|
||||||
val rs2Entry = rf.ram.readSync(s0.source @@ s0.rs2,enable = !output.isStall)
|
val rs2Entry = rf.ram.readSync(s0.source @@ s0.rs2,enable = !output.isStall)
|
||||||
val rs3Entry = rf.ram.readSync(s0.source @@ s0.rs3,enable = !output.isStall)
|
val rs3Entry = rf.ram.readSync(s0.source @@ s0.rs3,enable = !output.isStall)
|
||||||
output.source := s1.source
|
output.source := s1.source
|
||||||
output.opcode := s1.opcode
|
output.opcode := s1.opcode
|
||||||
output.lockId := s1LockId
|
|
||||||
output.arg := s1.arg
|
output.arg := s1.arg
|
||||||
output.roundMode := s1.roundMode
|
output.roundMode := s1.roundMode
|
||||||
output.rd := s1.rd
|
output.rd := s1.rd
|
||||||
|
@ -397,6 +411,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
add.payload.assignSomeByName(input.payload)
|
add.payload.assignSomeByName(input.payload)
|
||||||
add.rs2.sign.allowOverride;
|
add.rs2.sign.allowOverride;
|
||||||
add.rs2.sign := input.rs2.sign ^ input.arg(0)
|
add.rs2.sign := input.rs2.sign ^ input.arg(0)
|
||||||
|
add.needCommit := True
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -405,7 +420,6 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
|
|
||||||
case class S0() extends Bundle{
|
case class S0() extends Bundle{
|
||||||
val source = Source()
|
val source = Source()
|
||||||
val lockId = lockIdType()
|
|
||||||
val rd = p.rfAddress()
|
val rd = p.rfAddress()
|
||||||
val value = p.storeLoadType()
|
val value = p.storeLoadType()
|
||||||
val i2f = Bool()
|
val i2f = Bool()
|
||||||
|
@ -416,15 +430,15 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
|
|
||||||
val s0 = new Area{
|
val s0 = new Area{
|
||||||
val input = decode.load.pipelined(m2s = true, s2m = true)
|
val input = decode.load.pipelined(m2s = true, s2m = true)
|
||||||
val filtred = commitFork.load.map(port => port.takeWhen(port.sync))
|
val filtred = commitFork.load.map(port => port.takeWhen(List(FpuOpcode.LOAD, FpuOpcode.FMV_W_X, FpuOpcode.I2F).map(_ === port.opcode).orR))
|
||||||
def feed = filtred(input.source)
|
def feed = filtred(input.source)
|
||||||
val hazard = !feed.valid
|
val hazard = !feed.valid
|
||||||
|
|
||||||
|
|
||||||
val output = input.haltWhen(hazard).swapPayload(S0())
|
val output = input.haltWhen(hazard).swapPayload(S0())
|
||||||
filtred.foreach(_.ready := False)
|
filtred.foreach(_.ready := False)
|
||||||
feed.ready := input.valid && output.ready
|
feed.ready := input.valid && output.ready
|
||||||
output.source := input.source
|
output.source := input.source
|
||||||
output.lockId := input.lockId
|
|
||||||
output.rd := input.rd
|
output.rd := input.rd
|
||||||
output.value := feed.value
|
output.value := feed.value
|
||||||
output.i2f := input.i2f
|
output.i2f := input.i2f
|
||||||
|
@ -555,10 +569,8 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
when(isInfinity){recoded.setInfinity}
|
when(isInfinity){recoded.setInfinity}
|
||||||
when(isNan){recoded.setNan}
|
when(isNan){recoded.setNan}
|
||||||
|
|
||||||
val isCommited = rf.lock.map(_.commited).read(input.lockId)
|
val output = input.haltWhen(busy).swapPayload(new MergeInput())
|
||||||
val output = input.haltWhen(busy || !isCommited).swapPayload(new MergeInput())
|
|
||||||
output.source := input.source
|
output.source := input.source
|
||||||
output.lockId := input.lockId
|
|
||||||
output.roundMode := input.roundMode
|
output.roundMode := input.roundMode
|
||||||
if(p.withDouble) {
|
if(p.withDouble) {
|
||||||
output.format := input.format
|
output.format := input.format
|
||||||
|
@ -589,9 +601,10 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
val shortPip = new Area{
|
val shortPip = new Area{
|
||||||
val input = decode.shortPip.stage()
|
val input = decode.shortPip.stage()
|
||||||
|
|
||||||
|
val toFpuRf = List(FpuOpcode.MIN_MAX, FpuOpcode.SGNJ, FpuOpcode.FCVT_X_X).map(input.opcode === _).orR
|
||||||
val rfOutput = Stream(new MergeInput())
|
val rfOutput = Stream(new MergeInput())
|
||||||
|
|
||||||
val isCommited = rf.lock.map(_.commited).read(input.lockId)
|
val isCommited = commitConsume(_.short, input.source, input.fire && toFpuRf)
|
||||||
val output = rfOutput.haltWhen(!isCommited)
|
val output = rfOutput.haltWhen(!isCommited)
|
||||||
|
|
||||||
val result = p.storeLoadType().assignDontCare()
|
val result = p.storeLoadType().assignDontCare()
|
||||||
|
@ -809,11 +822,9 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
is(FpuOpcode.FCLASS) { result(31 downto 0) := fclassResult.resized }
|
is(FpuOpcode.FCLASS) { result(31 downto 0) := fclassResult.resized }
|
||||||
}
|
}
|
||||||
|
|
||||||
val toFpuRf = List(FpuOpcode.MIN_MAX, FpuOpcode.SGNJ, FpuOpcode.FCVT_X_X).map(input.opcode === _).orR
|
|
||||||
|
|
||||||
rfOutput.valid := input.valid && toFpuRf && !halt
|
rfOutput.valid := input.valid && toFpuRf && !halt
|
||||||
rfOutput.source := input.source
|
rfOutput.source := input.source
|
||||||
rfOutput.lockId := input.lockId
|
|
||||||
rfOutput.rd := input.rd
|
rfOutput.rd := input.rd
|
||||||
rfOutput.roundMode := input.roundMode
|
rfOutput.roundMode := input.roundMode
|
||||||
if(p.withDouble) rfOutput.format := input.format
|
if(p.withDouble) rfOutput.format := input.format
|
||||||
|
@ -930,8 +941,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
val input = mul.output.stage()
|
val input = mul.output.stage()
|
||||||
val sum = splits.take(sumSplitAt).map(e => (input.muls(e.id) << e.offsetC).resize(outWidth)).reduceBalancedTree(_ + _)
|
val sum = splits.take(sumSplitAt).map(e => (input.muls(e.id) << e.offsetC).resize(outWidth)).reduceBalancedTree(_ + _)
|
||||||
|
|
||||||
val isCommited = rf.lock.map(_.commited).read(input.lockId)
|
val output = input.swapPayload(new Sum1Output())
|
||||||
val output = input.haltWhen(!isCommited).swapPayload(new Sum1Output())
|
|
||||||
output.payload.assignSomeByName(input.payload)
|
output.payload.assignSomeByName(input.payload)
|
||||||
output.mulC2 := sum.resized
|
output.mulC2 := sum.resized
|
||||||
output.muls2 := Vec(input.muls.drop(sumSplitAt))
|
output.muls2 := Vec(input.muls.drop(sumSplitAt))
|
||||||
|
@ -941,7 +951,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
val input = sum1.output.stage()
|
val input = sum1.output.stage()
|
||||||
val sum = input.mulC2 + splits.drop(sumSplitAt).map(e => (input.muls2(e.id-sumSplitAt) << e.offsetC).resize(outWidth)).reduceBalancedTree(_ + _)
|
val sum = input.mulC2 + splits.drop(sumSplitAt).map(e => (input.muls2(e.id-sumSplitAt) << e.offsetC).resize(outWidth)).reduceBalancedTree(_ + _)
|
||||||
|
|
||||||
val isCommited = rf.lock.map(_.commited).read(input.lockId)
|
val isCommited = commitConsume(_.mul, input.source, input.fire)
|
||||||
val output = input.haltWhen(!isCommited).swapPayload(new Sum2Output())
|
val output = input.haltWhen(!isCommited).swapPayload(new Sum2Output())
|
||||||
output.payload.assignSomeByName(input.payload)
|
output.payload.assignSomeByName(input.payload)
|
||||||
output.mulC := sum
|
output.mulC := sum
|
||||||
|
@ -998,7 +1008,6 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
val output = Stream(new MergeInput())
|
val output = Stream(new MergeInput())
|
||||||
output.valid := input.valid && !input.add && !input.divSqrt
|
output.valid := input.valid && !input.add && !input.divSqrt
|
||||||
output.source := input.source
|
output.source := input.source
|
||||||
output.lockId := input.lockId
|
|
||||||
output.rd := input.rd
|
output.rd := input.rd
|
||||||
if (p.withDouble) output.format := input.format
|
if (p.withDouble) output.format := input.format
|
||||||
output.roundMode := input.roundMode
|
output.roundMode := input.roundMode
|
||||||
|
@ -1015,8 +1024,8 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
decode.mulToAdd.rs1.special := norm.output.special
|
decode.mulToAdd.rs1.special := norm.output.special
|
||||||
decode.mulToAdd.rs2 := input.rs3
|
decode.mulToAdd.rs2 := input.rs3
|
||||||
decode.mulToAdd.rd := input.rd
|
decode.mulToAdd.rd := input.rd
|
||||||
decode.mulToAdd.lockId := input.lockId
|
|
||||||
decode.mulToAdd.roundMode := input.roundMode
|
decode.mulToAdd.roundMode := input.roundMode
|
||||||
|
decode.mulToAdd.needCommit := False
|
||||||
if (p.withDouble) decode.mulToAdd.format := input.format
|
if (p.withDouble) decode.mulToAdd.format := input.format
|
||||||
|
|
||||||
when(NV){
|
when(NV){
|
||||||
|
@ -1031,7 +1040,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
val div = p.withDiv generate new Area{
|
val div = p.withDiv generate new Area{
|
||||||
val input = decode.div.halfPipe()
|
val input = decode.div.halfPipe()
|
||||||
val haltIt = True
|
val haltIt = True
|
||||||
val isCommited = RegNext(rf.lock.map(_.commited).read(input.lockId))
|
val isCommited = RegNext(commitConsume(_.div, input.source, input.fire))
|
||||||
val output = input.haltWhen(haltIt || !isCommited).swapPayload(new MergeInput())
|
val output = input.haltWhen(haltIt || !isCommited).swapPayload(new MergeInput())
|
||||||
|
|
||||||
val dividerShift = if(p.withDouble) 0 else 1
|
val dividerShift = if(p.withDouble) 0 else 1
|
||||||
|
@ -1096,7 +1105,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
val sqrt = p.withSqrt generate new Area{
|
val sqrt = p.withSqrt generate new Area{
|
||||||
val input = decode.sqrt.halfPipe()
|
val input = decode.sqrt.halfPipe()
|
||||||
val haltIt = True
|
val haltIt = True
|
||||||
val isCommited = RegNext(rf.lock.map(_.commited).read(input.lockId))
|
val isCommited = RegNext(commitConsume(_.sqrt, input.source, input.fire))
|
||||||
val output = input.haltWhen(haltIt || !isCommited).swapPayload(new MergeInput())
|
val output = input.haltWhen(haltIt || !isCommited).swapPayload(new MergeInput())
|
||||||
|
|
||||||
val needShift = !input.rs1.exponent.lsb
|
val needShift = !input.rs1.exponent.lsb
|
||||||
|
@ -1170,7 +1179,7 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
|
|
||||||
val divSqrt = p.withDivSqrt generate new Area {
|
val divSqrt = p.withDivSqrt generate new Area {
|
||||||
val input = decode.divSqrt.halfPipe()
|
val input = decode.divSqrt.halfPipe()
|
||||||
|
assert(false, "Need to implement commit tracking")
|
||||||
val aproxWidth = 8
|
val aproxWidth = 8
|
||||||
val aproxDepth = 64
|
val aproxDepth = 64
|
||||||
val divIterationCount = 3
|
val divIterationCount = 3
|
||||||
|
@ -1188,7 +1197,6 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
decode.divSqrtToMul.rs2.assignDontCare()
|
decode.divSqrtToMul.rs2.assignDontCare()
|
||||||
decode.divSqrtToMul.rs3.assignDontCare()
|
decode.divSqrtToMul.rs3.assignDontCare()
|
||||||
decode.divSqrtToMul.rd := input.rd
|
decode.divSqrtToMul.rd := input.rd
|
||||||
decode.divSqrtToMul.lockId := input.lockId
|
|
||||||
decode.divSqrtToMul.add := False
|
decode.divSqrtToMul.add := False
|
||||||
decode.divSqrtToMul.divSqrt := True
|
decode.divSqrtToMul.divSqrt := True
|
||||||
decode.divSqrtToMul.msb1 := True
|
decode.divSqrtToMul.msb1 := True
|
||||||
|
@ -1420,8 +1428,8 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
|
|
||||||
val oh = new Area {
|
val oh = new Area {
|
||||||
val input = math.output.stage()
|
val input = math.output.stage()
|
||||||
val isCommited = rf.lock.map(_.commited).read(input.lockId)
|
val isCommited = commitConsume(_.add, input.source, input.fire && input.needCommit)
|
||||||
val output = input.haltWhen(!isCommited).swapPayload(new OhOutput)
|
val output = input.haltWhen(input.needCommit && !isCommited).swapPayload(new OhOutput)
|
||||||
output.payload.assignSomeByName(input.payload)
|
output.payload.assignSomeByName(input.payload)
|
||||||
import input.payload._
|
import input.payload._
|
||||||
|
|
||||||
|
@ -1462,7 +1470,6 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
import input.payload._
|
import input.payload._
|
||||||
|
|
||||||
output.source := input.source
|
output.source := input.source
|
||||||
output.lockId := input.lockId
|
|
||||||
output.rd := input.rd
|
output.rd := input.rd
|
||||||
output.value.sign := xySign
|
output.value.sign := xySign
|
||||||
output.value.mantissa := (mantissa >> 2).resized
|
output.value.mantissa := (mantissa >> 2).resized
|
||||||
|
@ -1620,14 +1627,14 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
|
|
||||||
|
|
||||||
nx setWhen(!input.value.special && (roundAdjusted =/= 0))
|
nx setWhen(!input.value.special && (roundAdjusted =/= 0))
|
||||||
val write = rf.lock.map(_.write).read(input.lockId)
|
val writes = rf.scoreboards.map(_.writes.readAsync(input.rd))
|
||||||
|
val write = writes.toList.read(input.source)
|
||||||
output.NX := nx & write
|
output.NX := nx & write
|
||||||
output.OF := of & write
|
output.OF := of & write
|
||||||
output.UF := uf & write
|
output.UF := uf & write
|
||||||
output.NV := input.NV & write
|
output.NV := input.NV & write
|
||||||
output.DZ := input.DZ & write
|
output.DZ := input.DZ & write
|
||||||
output.source := input.source
|
output.source := input.source
|
||||||
output.lockId := input.lockId
|
|
||||||
output.rd := input.rd
|
output.rd := input.rd
|
||||||
output.write := write
|
output.write := write
|
||||||
if(p.withDouble) output.format := input.format
|
if(p.withDouble) output.format := input.format
|
||||||
|
@ -1649,8 +1656,11 @@ case class FpuCore( portCount : Int, p : FpuParameter) extends Component{
|
||||||
}
|
}
|
||||||
|
|
||||||
when(input.valid){
|
when(input.valid){
|
||||||
for(i <- 0 until p.rfLockCount) when(input.lockId === i){
|
for(i <- 0 until portCount) {
|
||||||
rf.lock(i).valid := False
|
val port = rf.scoreboards(i).hitWrite
|
||||||
|
port.valid setWhen(input.source === i)
|
||||||
|
port.address := input.rd
|
||||||
|
port.data := !rf.scoreboards(i).hit(input.rd) //TODO improve
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1852,6 +1862,19 @@ object FpuSynthesisBench extends App{
|
||||||
//Artix 7 -> 106 Mhz 3322 LUT 3023 FF
|
//Artix 7 -> 106 Mhz 3322 LUT 3023 FF
|
||||||
//Artix 7 -> 161 Mhz 3675 LUT 3163 FF
|
//Artix 7 -> 161 Mhz 3675 LUT 3163 FF
|
||||||
|
|
||||||
|
//Fpu_32 ->
|
||||||
|
//Artix 7 -> 132 Mhz 1891 LUT 1837 FF
|
||||||
|
//Artix 7 -> 209 Mhz 2132 LUT 1847 FF
|
||||||
|
//Fpu_64 ->
|
||||||
|
//Artix 7 -> 105 Mhz 3348 LUT 3024 FF
|
||||||
|
//Artix 7 -> 162 Mhz 3712 LUT 3165 FF
|
||||||
|
|
||||||
|
//Fpu_32 ->
|
||||||
|
//Artix 7 -> 128 Mhz 1796 LUT 1727 FF
|
||||||
|
//Artix 7 -> 208 Mhz 2049 LUT 1727 FF
|
||||||
|
//Fpu_64 ->
|
||||||
|
//Artix 7 -> 109 Mhz 3417 LUT 2913 FF
|
||||||
|
//Artix 7 -> 168 Mhz 3844 LUT 3053 FF
|
||||||
|
|
||||||
/*
|
/*
|
||||||
testfloat -tininessafter -all1 > all1.txt
|
testfloat -tininessafter -all1 > all1.txt
|
||||||
|
|
|
@ -118,7 +118,6 @@ object FpuRoundModeInstr extends SpinalEnum(){
|
||||||
case class FpuParameter( withDouble : Boolean,
|
case class FpuParameter( withDouble : Boolean,
|
||||||
mulWidthA : Int = 18,
|
mulWidthA : Int = 18,
|
||||||
mulWidthB : Int = 18,
|
mulWidthB : Int = 18,
|
||||||
rfLockCount : Int = 8,
|
|
||||||
sim : Boolean = false,
|
sim : Boolean = false,
|
||||||
withAdd : Boolean = true,
|
withAdd : Boolean = true,
|
||||||
withMul : Boolean = true,
|
withMul : Boolean = true,
|
||||||
|
@ -160,8 +159,9 @@ case class FpuCmd(p : FpuParameter) extends Bundle{
|
||||||
}
|
}
|
||||||
|
|
||||||
case class FpuCommit(p : FpuParameter) extends Bundle{
|
case class FpuCommit(p : FpuParameter) extends Bundle{
|
||||||
|
val opcode = FpuOpcode()
|
||||||
|
val rd = UInt(5 bits)
|
||||||
val write = Bool()
|
val write = Bool()
|
||||||
val sync = Bool()
|
|
||||||
val value = p.storeLoadType() // IEEE 754
|
val value = p.storeLoadType() // IEEE 754
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -22,7 +22,7 @@ class FpuPlugin(externalFpu : Boolean = false,
|
||||||
object FPU_ARG extends Stageable(Bits(2 bits))
|
object FPU_ARG extends Stageable(Bits(2 bits))
|
||||||
object FPU_FORMAT extends Stageable(FpuFormat())
|
object FPU_FORMAT extends Stageable(FpuFormat())
|
||||||
|
|
||||||
var port : FpuPort = null
|
var port : FpuPort = null //Commit port is already isolated
|
||||||
|
|
||||||
override def getVexRiscvRegressionArgs(): Seq[String] = {
|
override def getVexRiscvRegressionArgs(): Seq[String] = {
|
||||||
var args = List[String]()
|
var args = List[String]()
|
||||||
|
@ -331,7 +331,8 @@ class FpuPlugin(externalFpu : Boolean = false,
|
||||||
commit.value(31 downto 0) := (input(FPU_COMMIT_LOAD) ? dBusEncoding.loadData()(31 downto 0) | input(RS1))
|
commit.value(31 downto 0) := (input(FPU_COMMIT_LOAD) ? dBusEncoding.loadData()(31 downto 0) | input(RS1))
|
||||||
if(p.withDouble) commit.value(63 downto 32) := dBusEncoding.loadData()(63 downto 32)
|
if(p.withDouble) commit.value(63 downto 32) := dBusEncoding.loadData()(63 downto 32)
|
||||||
commit.write := arbitration.isValid && !arbitration.removeIt
|
commit.write := arbitration.isValid && !arbitration.removeIt
|
||||||
commit.sync := input(FPU_COMMIT_SYNC)
|
commit.opcode := input(FPU_OPCODE)
|
||||||
|
commit.rd := input(INSTRUCTION)(rdRange).asUInt
|
||||||
|
|
||||||
when(isCommit && !commit.ready){
|
when(isCommit && !commit.ready){
|
||||||
arbitration.haltByOther := True
|
arbitration.haltByOther := True
|
||||||
|
|
|
@ -55,7 +55,7 @@ class FpuTest extends FunSuite{
|
||||||
}
|
}
|
||||||
|
|
||||||
def testP(p : FpuParameter){
|
def testP(p : FpuParameter){
|
||||||
val portCount = 1
|
val portCount = 4
|
||||||
|
|
||||||
val config = SimConfig
|
val config = SimConfig
|
||||||
config.allOptimisation
|
config.allOptimisation
|
||||||
|
@ -276,8 +276,9 @@ class FpuTest extends FunSuite{
|
||||||
}
|
}
|
||||||
commitQueue += {cmd =>
|
commitQueue += {cmd =>
|
||||||
cmd.write #= true
|
cmd.write #= true
|
||||||
|
cmd.rd #= rd
|
||||||
cmd.value #= value
|
cmd.value #= value
|
||||||
cmd.sync #= true
|
cmd.opcode #= cmd.opcode.spinalEnum.LOAD
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -326,7 +327,8 @@ class FpuTest extends FunSuite{
|
||||||
}
|
}
|
||||||
commitQueue += {cmd =>
|
commitQueue += {cmd =>
|
||||||
cmd.write #= true
|
cmd.write #= true
|
||||||
cmd.sync #= false
|
cmd.rd #= rd
|
||||||
|
cmd.opcode #= opcode
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -404,7 +406,8 @@ class FpuTest extends FunSuite{
|
||||||
}
|
}
|
||||||
commitQueue += {cmd =>
|
commitQueue += {cmd =>
|
||||||
cmd.write #= true
|
cmd.write #= true
|
||||||
cmd.sync #= true
|
cmd.rd #= rd
|
||||||
|
cmd.opcode #= FpuOpcode.I2F
|
||||||
cmd.value #= value.toLong & 0xFFFFFFFFl
|
cmd.value #= value.toLong & 0xFFFFFFFFl
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -436,7 +439,8 @@ class FpuTest extends FunSuite{
|
||||||
}
|
}
|
||||||
commitQueue += {cmd =>
|
commitQueue += {cmd =>
|
||||||
cmd.write #= true
|
cmd.write #= true
|
||||||
cmd.sync #= true
|
cmd.rd #= rd
|
||||||
|
cmd.opcode #= FpuOpcode.FMV_W_X
|
||||||
cmd.value #= value.toLong & 0xFFFFFFFFl
|
cmd.value #= value.toLong & 0xFFFFFFFFl
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -454,7 +458,8 @@ class FpuTest extends FunSuite{
|
||||||
}
|
}
|
||||||
commitQueue += {cmd =>
|
commitQueue += {cmd =>
|
||||||
cmd.write #= true
|
cmd.write #= true
|
||||||
cmd.sync #= false
|
cmd.rd #= rd
|
||||||
|
cmd.opcode #= FpuOpcode.MIN_MAX
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue