diff --git a/README.md b/README.md index 8df7cf8..b3f0ddc 100644 --- a/README.md +++ b/README.md @@ -5,6 +5,10 @@ Verilog using the [Booth Algorithm][1]. [1]: https://en.wikipedia.org/wiki/Booth%27s_multiplication_algorithm +This design has been sucessfully synthesized with F4PGA +(`5aafae65883e95e41de2d0294729662dbe0a34f5`) on a Digilent Arty A7-35T +running at a clock speed of 100MHz. The test design is in `arty_test`. + ## License All source code is licensed under the CERN-OHL-W v2 or later. diff --git a/arty_test/soc.py b/arty_test/soc.py index e3406d8..64f3f96 100644 --- a/arty_test/soc.py +++ b/arty_test/soc.py @@ -1,38 +1,138 @@ -# Construct SoC. +# This file is licensed under the BSD 2 Clause License. +# (c) Peter McGoron 2022 +# Copyright (c) 2015-2019 Florent Kermarrec +# Copyright (c) 2020 Antmicro +# Copyright (c) 2022 Victor Suarez Rovere +# BSD 2-Clause License +# +# Copyright (c) Copyright 2012-2022 Enjoy-Digital. +# Copyright (c) Copyright 2012-2022 / LiteX-Hub community. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -from migen import ClockSignal -from litex_boards.targets.digilent_arty import BaseSoC -from litex_boards.platforms.digilent_arty import Platform, Builder -from litescope import LiteScopeAnalyzer +from migen import * +from litex.soc.integration.builder import Builder +from litex.soc.integration.soc_core import SoCCore +from litex.soc.cores.clock import S7PLL, S7IDELAYCTRL +from litex.soc.interconnect.csr import AutoCSR, Module, CSRStorage, CSRStatus +from litex_boards.platforms.digilent_arty import Platform +import math -platform = Platform(variant="a7-100") -platform.add_source("../boothmul.v") +from litedram.phy import s7ddrphy +from litedram.modules import MT41K128M16 +from liteeth.phy.mii import LiteEthPHYMII -class TestPlatform(BaseSoC): - def __init__(self, platform, constwid = 48, inwid = 48): - BaseSoC.__init__( - with_uartbone = True, - toolchain = "symbiflow", - platform = platform +# Clock and Reset Generator +class _CRG(Module): + def __init__(self, platform, sys_clk_freq, with_dram=True, with_rst=True): + self.rst = Signal() + self.clock_domains.cd_sys = ClockDomain() + self.clock_domains.cd_eth = ClockDomain() + if with_dram: + self.clock_domains.cd_sys4x = ClockDomain() + self.clock_domains.cd_sys4x_dqs = ClockDomain() + self.clock_domains.cd_idelay = ClockDomain() + + # Clk/Rst. + clk100 = platform.request("clk100") + rst = ~platform.request("cpu_reset") if with_rst else 0 + + # PLL. + self.submodules.pll = pll = S7PLL(speedgrade=-1) + self.comb += pll.reset.eq(rst | self.rst) + pll.register_clkin(clk100, 100e6) + pll.create_clkout(self.cd_sys, sys_clk_freq) + pll.create_clkout(self.cd_eth, 25e6) + self.comb += platform.request("eth_ref_clk").eq(self.cd_eth.clk) + platform.add_false_path_constraints(self.cd_sys.clk, pll.clkin) # Ignore sys_clk to pll.clkin path created by SoC's rst. + if with_dram: + pll.create_clkout(self.cd_sys4x, 4*sys_clk_freq) + pll.create_clkout(self.cd_sys4x_dqs, 4*sys_clk_freq, phase=90) + pll.create_clkout(self.cd_idelay, 200e6) + + # IdelayCtrl. + if with_dram: + self.submodules.idelayctrl = S7IDELAYCTRL(self.cd_idelay) + +class Multiplier(Module, AutoCSR): + def __init__(self, constwid = 48, inwid = 48): + a2lensiz = math.ceil(math.log2(inwid) + 1) + 1 + self.const_in = CSRStorage(constwid, description="Multiplier Constant") + self.inval = CSRStorage(inwid, description="Multiplier 2nd Input") + self.outval = CSRStatus(constwid + inwid, description="Multiplier Output") + self.p = CSRStatus(constwid + inwid + 2, description="P") + self.a = CSRStatus(constwid + inwid + 2, description="A") + self.s = CSRStatus(constwid + inwid + 2, description="S") + self.state = CSRStatus(a2lensiz, description="State") + self.arm = CSRStorage(1, description="Arm") + self.fin = CSRStatus(1, description="Multiplier Finished") + + self.specials += Instance("boothmul", + p_A1_LEN = constwid, + p_A2_LEN = inwid, + p_A2LEN_SIZ = math.ceil(math.log2(inwid) + 1) + 1, + i_clk = ClockSignal("sys"), + i_a1 = self.const_in.storage, + i_a2 = self.inval.storage, + o_outn = self.outval.status, + i_arm = self.arm.storage, + o_fin = self.fin.status, + o_debug_a = self.a.status, + o_debug_s = self.s.status, + o_debug_p = self.p.status, + o_debug_state = self.state.status ) - self.const_in = CSRStorage(constwid) - self.inval = CSRStorage(inwid) - self.outval = CSRStatus(const_wid + inwid) - self.arm = CSRStorage(arm) - self.fin = CSRStatus(arm) - - self.specials += Instance("boothmul", - p_A1_LEN = constwid, - p_A2_LEN = inwid, - A2LEN_SIZ = math.ceil(math.log2(inwid) + 1) + 1, - clk = ClockSignal(), - a1 = self.const_in.storage, - a2 = self.inval.storage, - outn = self.outval.status, - i_arm = self.arm.storage, - fin = self.fin.status - ) +class TestPlatform(SoCCore, AutoCSR): + def __init__(self, platform, constwid = 8, inwid = 8): + sys_clk_freq = int(100e6) + SoCCore.__init__(self, + clk_freq=sys_clk_freq, + cpu_type = None, + integrated_sram_size = 0x2000, + with_uart = False, + platform = platform + ) + from litescope import LiteScopeAnalyzer + self.submodules.crg = _CRG(platform, sys_clk_freq, True) + self.submodules.multiplier = Multiplier(constwid, inwid) + self.add_uartbone(name="serial", baudrate=115200) + self.submodules.analyzer = LiteScopeAnalyzer( + [ + self.multiplier.p.status, + self.multiplier.a.status, + self.multiplier.s.status, + self.multiplier.inval.storage, + self.multiplier.const_in.storage, + self.multiplier.outval.status, + self.multiplier.arm.storage, + self.multiplier.fin.status, + self.multiplier.state.status, + ], depth=64, clock_domain = "sys", samplerate = sys_clk_freq, + csr_csv = "analyzer.csv" + ) +platform = Platform(variant="a7-35", toolchain="symbiflow") +platform.add_source("../boothmul.v") builder = Builder(TestPlatform(platform), csr_csv="csr.csv") builder.build() diff --git a/boothmul.v b/boothmul.v index 0962b78..0c955d6 100644 --- a/boothmul.v +++ b/boothmul.v @@ -1,3 +1,4 @@ +`define DEBUG /* Booth Multiplication v0.1 * Written by Peter McGoron, 2022. * @@ -29,6 +30,12 @@ module boothmul input [A1_LEN-1:0] a1, input [A2_LEN-1:0] a2, output [A1_LEN+A2_LEN-1:0] outn, +`ifdef DEBUG + output [A1_LEN+A2_LEN+1:0] debug_a, + output [A1_LEN+A2_LEN+1:0] debug_s, + output [A1_LEN+A2_LEN+1:0] debug_p, + output [A2LEN_SIZ-1:0] debug_state, +`endif output reg fin ); @@ -36,8 +43,8 @@ module boothmul * Booth Parameters **********************/ -localparam OUT_LEN = A1_LEN + A2_LEN; -localparam REG_LEN = OUT_LEN + 2; +`define OUT_LEN (A1_LEN + A2_LEN) +`define REG_LEN (`OUT_LEN + 2) /* The Booth multiplication algorithm is a sequential algorithm for * twos-compliment integers. @@ -63,11 +70,26 @@ localparam REG_LEN = OUT_LEN + 2; * [M][ A1_LEN ][ A2_LEN ][0] */ -reg signed [REG_LEN-1:0] a; -reg signed [REG_LEN-1:0] s; -reg signed [REG_LEN-1:0] p = 0; +reg [A1_LEN-1:0] a1_reg; -assign outn[OUT_LEN-1:0] = p[REG_LEN-2:1]; +wire [`REG_LEN-1:0] a; +assign a[A2_LEN:0] = 0; +assign a[`REG_LEN-2:A2_LEN+1] = a1_reg; +assign a[`REG_LEN-1] = a1_reg[A1_LEN-1]; +wire signed [`REG_LEN-1:0] a_signed; +assign a_signed = a; + +wire [`REG_LEN-1:0] s; +assign s[A2_LEN:0] = 0; +assign s[`REG_LEN-1:A2_LEN+1] = ~{a1_reg[A1_LEN-1],a1_reg} + 1; +wire signed [`REG_LEN-1:0] s_signed; +assign s_signed = s; + +reg [`REG_LEN-1:0] p; +wire signed [`REG_LEN-1:0] p_signed; +assign p_signed = p; + +assign outn = p[`REG_LEN-2:1]; /********************** * Loop Implementation @@ -75,6 +97,13 @@ assign outn[OUT_LEN-1:0] = p[REG_LEN-2:1]; reg[A2LEN_SIZ-1:0] loop_accul = 0; +`ifdef DEBUG +assign debug_a = a; +assign debug_s = s; +assign debug_p = p; +assign debug_state = loop_accul; +`endif + always @ (posedge clk) begin if (!arm) begin loop_accul <= 0; @@ -82,18 +111,14 @@ always @ (posedge clk) begin end else if (loop_accul == 0) begin p[0] <= 0; p[A2_LEN:1] <= a2; - p[REG_LEN-1:A2_LEN+1] <= 0; + p[`REG_LEN-1:A2_LEN+1] <= 0; - a[A2_LEN:0] <= 0; - a[REG_LEN-2:A2_LEN + 1] <= a1; - a[REG_LEN-1] <= a1[A1_LEN-1]; // Sign extension - - s[A2_LEN:0] <= 0; - // Extend before negation to ensure size - s[REG_LEN-1:A2_LEN+1] <= ~{a1[A1_LEN-1],a1} + 1; + a1_reg <= a1; loop_accul <= loop_accul + 1; + /* verilator lint_off WIDTH */ end else if (loop_accul < A2_LEN + 1) begin + /* verilator lint_on WIDTH */ /* The loop counter starts from 1, so it must go to * A2_LEN + 1 exclusive. * (i = 0; i < len; i++) @@ -101,9 +126,9 @@ always @ (posedge clk) begin */ loop_accul <= loop_accul + 1; case (p[1:0]) - 2'b00, 2'b11: p <= p >>> 1; - 2'b10: p <= (p + s) >>> 1; - 2'b01: p <= (p + a) >>> 1; + 2'b00, 2'b11: p <= p_signed >>> 1; + 2'b10: p <= (p_signed + s_signed) >>> 1; + 2'b01: p <= (p_signed + a_signed) >>> 1; endcase end else begin fin <= 1; diff --git a/sim.cpp b/sim.cpp index 0b697ee..7edf0bf 100644 --- a/sim.cpp +++ b/sim.cpp @@ -71,6 +71,7 @@ int main(int argc, char **argv) { mod->final(); delete mod; + std::cout << "done" << std::endl; return 0; }