upsilon/firmware/rtl/control_loop/boothmul.v.m4

147 lines
3.6 KiB
Plaintext

m4_changequote(`⟨', `⟩')
m4_changecom(⟨/*⟩, ⟨*/⟩)
/* Booth Multiplication v1.0
* Written by Peter McGoron, 2022.
*
* This source describes Open Hardware and is licensed under the
* CERN-OHL-W v2.
* You may redistribute and modify this documentation and make products using
* it under the terms of the CERN-OHL-W v2 (https:/cern.ch/cern-ohl), or, at
* your option, any later version.
*
* This documentation is distributed WITHOUT ANY EXPRESS OR IMPLIED WARRANTY,
* INCLUDING OF MERCHANTABILITY, SATISFACTORY QUALITY AND FITNESS FOR
* A PARTICULAR PURPOSE. Please see the CERN-OHL-W v2 for applicable
* conditions.
*
* Source location: https://software.mcgoron.com/peter/boothmul
*/
module boothmul
#(
parameter A1_LEN = 32,
parameter A2_LEN = 32,
// AZLEN_SIZ = floor(log2(A2_LEN + 2) + 1).
// It must be able to store A2_LEN + 2.
parameter A2LEN_SIZ = 6
)
(
input clk,
input arm,
input [A1_LEN-1:0] a1,
input [A2_LEN-1:0] a2,
m4_define(M4_OUT_LEN, (A1_LEN + A2_LEN))
output [M4_OUT_LEN-1:0] outn,
`ifdef DEBUG
output [M4_OUT_LEN+1:0] debug_a,
output [M4_OUT_LEN+1:0] debug_s,
output [M4_OUT_LEN+1:0] debug_p,
output [A2LEN_SIZ-1:0] debug_state,
`endif
output reg fin
);
/***********************
* Booth Parameters
**********************/
m4_define(M4_REG_LEN, (M4_OUT_LEN + 2))
/* The Booth multiplication algorithm is a sequential algorithm for
* twos-compliment integers.
*
* Let REG_LEN be equal to 1 + len(a1) + len(a2) + 1.
* Let P, S, and A be of length REG_LEN.
* Let A = a1 << len(a2) + 1, where a1 sign extends to the upper bit.
* Let S = -a1 << len(a2) + 1, where a1 sign extens to the upper bit.
* Let P = a2 << 1.
*
* Repeat the following len(a2) times:
* case(P[1:0])
* 2'b00, 2'b11: P <= P >>> 1;
* 2'b01: P <= (P + A) >>> 1;
* 2'b10: P <= (P + S) >>> 1;
* endcase
* The final value is P[REG_LEN-2:1].
*
* Wires and registers of REG_LEN length are organized like:
*
* /Overflow bit
* [M][ REG_LEN ][0]
* [M][ A1_LEN ][ A2_LEN ][0]
*/
reg [A1_LEN-1:0] a1_reg;
wire [M4_REG_LEN-1:0] a;
assign a[A2_LEN:0] = 0;
assign a[M4_REG_LEN-2:A2_LEN+1] = a1_reg;
assign a[M4_REG_LEN-1] = a1_reg[A1_LEN-1];
wire signed [M4_REG_LEN-1:0] a_signed;
assign a_signed = a;
wire [M4_REG_LEN-1:0] s;
assign s[A2_LEN:0] = 0;
assign s[M4_REG_LEN-1:A2_LEN+1] = ~{a1_reg[A1_LEN-1],a1_reg} + 1;
wire signed [M4_REG_LEN-1:0] s_signed;
assign s_signed = s;
reg [M4_REG_LEN-1:0] p;
wire signed [M4_REG_LEN-1:0] p_signed;
assign p_signed = p;
assign outn = p[M4_REG_LEN-2:1];
/**********************
* Loop Implementation
*********************/
reg[A2LEN_SIZ-1:0] loop_accul = 0;
`ifdef DEBUG
assign debug_a = a;
assign debug_s = s;
assign debug_p = p;
assign debug_state = loop_accul;
`endif
always @ (posedge clk) begin
if (!arm) begin
loop_accul <= 0;
fin <= 0;
end else if (loop_accul == 0) begin
p[0] <= 0;
p[A2_LEN:1] <= a2;
p[M4_REG_LEN-1:A2_LEN+1] <= 0;
a1_reg <= a1;
loop_accul <= loop_accul + 1;
/* verilator lint_off WIDTH */
end else if (loop_accul < A2_LEN + 1) begin
/* verilator lint_on WIDTH */
/* The loop counter starts from 1, so it must go to
* A2_LEN + 1 exclusive.
* (i = 0; i < len; i++)
* becomes (i = 1; i < len + 1; i++)
*/
loop_accul <= loop_accul + 1;
case (p[1:0])
2'b00, 2'b11: p <= p_signed >>> 1;
2'b10: p <= (p_signed + s_signed) >>> 1;
2'b01: p <= (p_signed + a_signed) >>> 1;
endcase
end else begin
fin <= 1;
end
end
`ifdef BOOTH_SIM
initial begin
$dumpfile("booth.vcd");
$dumpvars;
end
`endif
endmodule