succesfully synthesize design

This commit is contained in:
Peter McGoron @ planck 2022-10-29 22:20:15 -04:00
parent 175a3fbe3c
commit 89acb17aa0
4 changed files with 177 additions and 47 deletions

View File

@ -5,6 +5,10 @@ Verilog using the [Booth Algorithm][1].
[1]: https://en.wikipedia.org/wiki/Booth%27s_multiplication_algorithm
This design has been sucessfully synthesized with F4PGA
(`5aafae65883e95e41de2d0294729662dbe0a34f5`) on a Digilent Arty A7-35T
running at a clock speed of 100MHz. The test design is in `arty_test`.
## License
All source code is licensed under the CERN-OHL-W v2 or later.

View File

@ -1,38 +1,138 @@
# Construct SoC.
# This file is licensed under the BSD 2 Clause License.
# (c) Peter McGoron 2022
# Copyright (c) 2015-2019 Florent Kermarrec <florent@enjoy-digital.fr>
# Copyright (c) 2020 Antmicro <www.antmicro.com>
# Copyright (c) 2022 Victor Suarez Rovere <suarezvictor@gmail.com>
# BSD 2-Clause License
#
# Copyright (c) Copyright 2012-2022 Enjoy-Digital.
# Copyright (c) Copyright 2012-2022 / LiteX-Hub community.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
from migen import ClockSignal
from litex_boards.targets.digilent_arty import BaseSoC
from litex_boards.platforms.digilent_arty import Platform, Builder
from litescope import LiteScopeAnalyzer
from migen import *
from litex.soc.integration.builder import Builder
from litex.soc.integration.soc_core import SoCCore
from litex.soc.cores.clock import S7PLL, S7IDELAYCTRL
from litex.soc.interconnect.csr import AutoCSR, Module, CSRStorage, CSRStatus
from litex_boards.platforms.digilent_arty import Platform
import math
platform = Platform(variant="a7-100")
platform.add_source("../boothmul.v")
from litedram.phy import s7ddrphy
from litedram.modules import MT41K128M16
from liteeth.phy.mii import LiteEthPHYMII
class TestPlatform(BaseSoC):
def __init__(self, platform, constwid = 48, inwid = 48):
BaseSoC.__init__(
with_uartbone = True,
toolchain = "symbiflow",
platform = platform
# Clock and Reset Generator
class _CRG(Module):
def __init__(self, platform, sys_clk_freq, with_dram=True, with_rst=True):
self.rst = Signal()
self.clock_domains.cd_sys = ClockDomain()
self.clock_domains.cd_eth = ClockDomain()
if with_dram:
self.clock_domains.cd_sys4x = ClockDomain()
self.clock_domains.cd_sys4x_dqs = ClockDomain()
self.clock_domains.cd_idelay = ClockDomain()
# Clk/Rst.
clk100 = platform.request("clk100")
rst = ~platform.request("cpu_reset") if with_rst else 0
# PLL.
self.submodules.pll = pll = S7PLL(speedgrade=-1)
self.comb += pll.reset.eq(rst | self.rst)
pll.register_clkin(clk100, 100e6)
pll.create_clkout(self.cd_sys, sys_clk_freq)
pll.create_clkout(self.cd_eth, 25e6)
self.comb += platform.request("eth_ref_clk").eq(self.cd_eth.clk)
platform.add_false_path_constraints(self.cd_sys.clk, pll.clkin) # Ignore sys_clk to pll.clkin path created by SoC's rst.
if with_dram:
pll.create_clkout(self.cd_sys4x, 4*sys_clk_freq)
pll.create_clkout(self.cd_sys4x_dqs, 4*sys_clk_freq, phase=90)
pll.create_clkout(self.cd_idelay, 200e6)
# IdelayCtrl.
if with_dram:
self.submodules.idelayctrl = S7IDELAYCTRL(self.cd_idelay)
class Multiplier(Module, AutoCSR):
def __init__(self, constwid = 48, inwid = 48):
a2lensiz = math.ceil(math.log2(inwid) + 1) + 1
self.const_in = CSRStorage(constwid, description="Multiplier Constant")
self.inval = CSRStorage(inwid, description="Multiplier 2nd Input")
self.outval = CSRStatus(constwid + inwid, description="Multiplier Output")
self.p = CSRStatus(constwid + inwid + 2, description="P")
self.a = CSRStatus(constwid + inwid + 2, description="A")
self.s = CSRStatus(constwid + inwid + 2, description="S")
self.state = CSRStatus(a2lensiz, description="State")
self.arm = CSRStorage(1, description="Arm")
self.fin = CSRStatus(1, description="Multiplier Finished")
self.specials += Instance("boothmul",
p_A1_LEN = constwid,
p_A2_LEN = inwid,
p_A2LEN_SIZ = math.ceil(math.log2(inwid) + 1) + 1,
i_clk = ClockSignal("sys"),
i_a1 = self.const_in.storage,
i_a2 = self.inval.storage,
o_outn = self.outval.status,
i_arm = self.arm.storage,
o_fin = self.fin.status,
o_debug_a = self.a.status,
o_debug_s = self.s.status,
o_debug_p = self.p.status,
o_debug_state = self.state.status
)
self.const_in = CSRStorage(constwid)
self.inval = CSRStorage(inwid)
self.outval = CSRStatus(const_wid + inwid)
self.arm = CSRStorage(arm)
self.fin = CSRStatus(arm)
self.specials += Instance("boothmul",
p_A1_LEN = constwid,
p_A2_LEN = inwid,
A2LEN_SIZ = math.ceil(math.log2(inwid) + 1) + 1,
clk = ClockSignal(),
a1 = self.const_in.storage,
a2 = self.inval.storage,
outn = self.outval.status,
i_arm = self.arm.storage,
fin = self.fin.status
)
class TestPlatform(SoCCore, AutoCSR):
def __init__(self, platform, constwid = 8, inwid = 8):
sys_clk_freq = int(100e6)
SoCCore.__init__(self,
clk_freq=sys_clk_freq,
cpu_type = None,
integrated_sram_size = 0x2000,
with_uart = False,
platform = platform
)
from litescope import LiteScopeAnalyzer
self.submodules.crg = _CRG(platform, sys_clk_freq, True)
self.submodules.multiplier = Multiplier(constwid, inwid)
self.add_uartbone(name="serial", baudrate=115200)
self.submodules.analyzer = LiteScopeAnalyzer(
[
self.multiplier.p.status,
self.multiplier.a.status,
self.multiplier.s.status,
self.multiplier.inval.storage,
self.multiplier.const_in.storage,
self.multiplier.outval.status,
self.multiplier.arm.storage,
self.multiplier.fin.status,
self.multiplier.state.status,
], depth=64, clock_domain = "sys", samplerate = sys_clk_freq,
csr_csv = "analyzer.csv"
)
platform = Platform(variant="a7-35", toolchain="symbiflow")
platform.add_source("../boothmul.v")
builder = Builder(TestPlatform(platform), csr_csv="csr.csv")
builder.build()

View File

@ -1,3 +1,4 @@
`define DEBUG
/* Booth Multiplication v0.1
* Written by Peter McGoron, 2022.
*
@ -29,6 +30,12 @@ module boothmul
input [A1_LEN-1:0] a1,
input [A2_LEN-1:0] a2,
output [A1_LEN+A2_LEN-1:0] outn,
`ifdef DEBUG
output [A1_LEN+A2_LEN+1:0] debug_a,
output [A1_LEN+A2_LEN+1:0] debug_s,
output [A1_LEN+A2_LEN+1:0] debug_p,
output [A2LEN_SIZ-1:0] debug_state,
`endif
output reg fin
);
@ -36,8 +43,8 @@ module boothmul
* Booth Parameters
**********************/
localparam OUT_LEN = A1_LEN + A2_LEN;
localparam REG_LEN = OUT_LEN + 2;
`define OUT_LEN (A1_LEN + A2_LEN)
`define REG_LEN (`OUT_LEN + 2)
/* The Booth multiplication algorithm is a sequential algorithm for
* twos-compliment integers.
@ -63,11 +70,26 @@ localparam REG_LEN = OUT_LEN + 2;
* [M][ A1_LEN ][ A2_LEN ][0]
*/
reg signed [REG_LEN-1:0] a;
reg signed [REG_LEN-1:0] s;
reg signed [REG_LEN-1:0] p = 0;
reg [A1_LEN-1:0] a1_reg;
assign outn[OUT_LEN-1:0] = p[REG_LEN-2:1];
wire [`REG_LEN-1:0] a;
assign a[A2_LEN:0] = 0;
assign a[`REG_LEN-2:A2_LEN+1] = a1_reg;
assign a[`REG_LEN-1] = a1_reg[A1_LEN-1];
wire signed [`REG_LEN-1:0] a_signed;
assign a_signed = a;
wire [`REG_LEN-1:0] s;
assign s[A2_LEN:0] = 0;
assign s[`REG_LEN-1:A2_LEN+1] = ~{a1_reg[A1_LEN-1],a1_reg} + 1;
wire signed [`REG_LEN-1:0] s_signed;
assign s_signed = s;
reg [`REG_LEN-1:0] p;
wire signed [`REG_LEN-1:0] p_signed;
assign p_signed = p;
assign outn = p[`REG_LEN-2:1];
/**********************
* Loop Implementation
@ -75,6 +97,13 @@ assign outn[OUT_LEN-1:0] = p[REG_LEN-2:1];
reg[A2LEN_SIZ-1:0] loop_accul = 0;
`ifdef DEBUG
assign debug_a = a;
assign debug_s = s;
assign debug_p = p;
assign debug_state = loop_accul;
`endif
always @ (posedge clk) begin
if (!arm) begin
loop_accul <= 0;
@ -82,18 +111,14 @@ always @ (posedge clk) begin
end else if (loop_accul == 0) begin
p[0] <= 0;
p[A2_LEN:1] <= a2;
p[REG_LEN-1:A2_LEN+1] <= 0;
p[`REG_LEN-1:A2_LEN+1] <= 0;
a[A2_LEN:0] <= 0;
a[REG_LEN-2:A2_LEN + 1] <= a1;
a[REG_LEN-1] <= a1[A1_LEN-1]; // Sign extension
s[A2_LEN:0] <= 0;
// Extend before negation to ensure size
s[REG_LEN-1:A2_LEN+1] <= ~{a1[A1_LEN-1],a1} + 1;
a1_reg <= a1;
loop_accul <= loop_accul + 1;
/* verilator lint_off WIDTH */
end else if (loop_accul < A2_LEN + 1) begin
/* verilator lint_on WIDTH */
/* The loop counter starts from 1, so it must go to
* A2_LEN + 1 exclusive.
* (i = 0; i < len; i++)
@ -101,9 +126,9 @@ always @ (posedge clk) begin
*/
loop_accul <= loop_accul + 1;
case (p[1:0])
2'b00, 2'b11: p <= p >>> 1;
2'b10: p <= (p + s) >>> 1;
2'b01: p <= (p + a) >>> 1;
2'b00, 2'b11: p <= p_signed >>> 1;
2'b10: p <= (p_signed + s_signed) >>> 1;
2'b01: p <= (p_signed + a_signed) >>> 1;
endcase
end else begin
fin <= 1;

View File

@ -71,6 +71,7 @@ int main(int argc, char **argv) {
mod->final();
delete mod;
std::cout << "done" << std::endl;
return 0;
}