From ba901a80d79e1533e2f2bc59620b8c6aa482288a Mon Sep 17 00:00:00 2001
From: Peter McGoron <code@mcgoron.com>
Date: Fri, 28 Oct 2022 17:31:23 -0400
Subject: [PATCH] separate math into other file

---
 firmware/rtl/control_loop/control_loop_math.v | 281 ++++++++++++++++++
 firmware/rtl/control_loop/mul_const.v         |  56 ++++
 firmware/rtl/control_loop/mul_const_sim.cpp   |  75 +++++
 3 files changed, 412 insertions(+)
 create mode 100644 firmware/rtl/control_loop/control_loop_math.v
 create mode 100644 firmware/rtl/control_loop/mul_const.v
 create mode 100644 firmware/rtl/control_loop/mul_const_sim.cpp

diff --git a/firmware/rtl/control_loop/control_loop_math.v b/firmware/rtl/control_loop/control_loop_math.v
new file mode 100644
index 0000000..04d9df5
--- /dev/null
+++ b/firmware/rtl/control_loop/control_loop_math.v
@@ -0,0 +1,281 @@
+/*************** Precision **************
+ * The control loop is designed around these values, but generally
+ * does not hardcode them.
+ *
+ * Since α and P are precalculated outside of the loop, their
+ * conversion to numbers the loop understands is done outside of
+ * the loop and in the kernel.
+ *
+ * The 18-bit ADC is twos-compliment, -10.24V to 10.24V,
+ * with 78μV per increment.
+ * The 20-bit DAC is twos-compliment, -10V to 10V.
+ *
+ * The `P` constant has a minimum value of 1e-7 with a precision
+ * of 1e-9, and a maxmimum value of 1.
+ *
+ * The `I` constant has a minimum value of 1e-4 with a precision
+ * of 1e-6 and a maximum value of 100.
+ *
+ * Δt is cycles/100MHz. This makes Δt at least 10 ns, with a
+ * maximum of 1 ms.
+ *
+ * [1 : sign][7: whole][40: fractional]
+ * This is 127 to -128, with a resolution of 9.095e-13.
+ */
+
+/* If this design needs to be faster, you can:
+ 1) Pipeline the design
+ 2) Use DSPs
+
+ With symbiflow + yosys there is no way to explicitly instantiate
+ a DSP40 module. YOSYS may infer it but that might be unreliable.
+ */
+
+module control_loop_math #(
+	parameter CONSTS_WHOLE = 8,
+	parameter CONSTS_FRAC = 40,
+	parameter CONSTS_WID = CONSTS_WHOLE + CONSTS_FRAC,
+	/* This number is the conversion from ADC voltage units to
+	 * a fixed-point number.
+	 * A micro-optimization could roll the ADC reading and the multiplier
+	 * together.
+	 * The LSB of this number is 2**(-CONSTS_FRAC).
+	 */
+	parameter INT_TO_REAL_WID = 27,
+	parameter [INT_TO_REAL_WID-1:0] INT_TO_REAL = 'b101000111001001111101110010,
+	/* This number is 1/(clock cycle).
+	   The number is interpreted so the least significant bit
+	   coincides with the LSB of a constant. */
+	parameter SEC_PER_CYCLE_WID = 14,
+	parameter [SEC_PER_CYCLE_WID-1:0] SEC_PER_CYCLE = 'b10101011110011,
+	parameter DELAY_WID = 16,
+	parameter DAC_DATA_WID = 20,
+	parameter ADC_WID = 18,
+	parameter CYCLE_COUNT_WID = 18
+) (
+	input clk,
+	input arm,
+	output finished,
+
+	input [ADC_WID-1:0] setpt,
+	input [ADC_WID-1:0] measured,
+	input [CONSTS_WID-1:0] cl_P,
+	input [CONSTS_WID-1:0] cl_I,
+	input [CONSTS_WID-1:0] e_prev,
+	input [CYCLE_COUNT_WID-1:0] cycles,
+	input [DELAY_WID-1:0] dely,
+
+	output reg [CONSTS_WID-1:0] e_cur,
+	output reg [DAC_DATA_WID-1:0] adjval
+);
+
+/**** Stage 1: Convert error to real value, calculate Δt = cycles/100MHz
+ *
+ *    e_unscaled: ERR_WID.0
+ * x INT_TO_REAL: 0.INT_TO_REAL_WID
+ *- -----------------------------
+ *   e_scaled_unsat: ERR_WID + INT_TO_REAL_WID
+ */
+
+localparam ERR_WID = ADC_WID + 1;
+wire [ERR_WID-1:0] e_unscaled = setpt - measured;
+
+reg arm_stage_1 = 0;
+wire mul_scale_err_fin;
+
+localparam E_UNTRUNC_WID = ERR_WID + INT_TO_REAL_WID;
+wire [E_UNTRUNC_WID-1:0] e_scaled_unsat;
+boothmul #(
+	.A1_LEN(INT_TO_REAL_WID),
+	.A2_LEN(ERR_WID)
+) mul_scale_err (
+	.clk(clk),
+	.arm(arm_stage_1),
+	.a1(INT_TO_REAL),
+	.a2(e_unscaled),
+	.outn(e_scaled_unsat),
+	.fin(mul_scale_err_fin)
+);
+
+localparam E_WID = E_UNTRUNC_WID > CONSTS_WID ? CONSTS_WID : E_UNTRUNC_WID;
+wire [E_WID-1:0] e;
+
+localparam E_FRAC = E_WID < CONSTS_FRAC ? E_WID : E_WID - CONSTS_FRAC;
+localparam E_WHOLE = E_WID - E_FRAC;
+
+/* Don't bother keeping numbers larger than the constant width
+ * since the value will always fit in it. */
+
+generate if (E_UNTRUNC_WID > CONSTS_WID) begin
+	intsat #(
+		.IN_LEN(E_UNTRUNC_WID),
+		.LTRUNC(E_UNTRUNC_WID - CONSTS_WHOLE)
+	) sat_mul_scale_err (
+		.inp(e_scaled_unsat),
+		.outp(e)
+	);
+end else begin
+	assign e = e_scaled_unsat;
+end endgenerate
+
+/*    cycles: CYCLE_COUNT_WID.0
+ *    SEC_PER_CYCLE: 0....SEC_PER_CYCLE_WID
+ * -x--------------------------------
+ *    dt_unsat: CYCLE_COUNT_WID + SEC_PER_CYCLE_WID
+ *
+ * Optimization note: the total width can be capped to below 1.
+ */
+
+localparam DT_UNSAT_WID = CYCLE_COUNT_WID + SEC_PER_CYCLE_WID;
+wire [DT_UNSAT_WID-1:0] dt_unsat;
+wire mul_dt_fin;
+boothmul #(
+	.A1_LEN(CYCLE_COUNT_WID),
+	.A2_LEN(SEC_PER_CYCLE_WID)
+) mul_dt (
+	.clk(clk),
+	.arm(arm_stage_1),
+	.a1(cycles),
+	.a2(SEC_PER_CYCLE),
+	.outn(dt_unsat),
+	.fin(mul_dt_fin)
+);
+
+localparam DT_WID = DT_UNSAT_WID > CONSTS_WID ? CONSTS_WID : DT_UNSAT_WID;
+wire [DT_WID-1:0] dt;
+
+localparam DT_WHOLE = DT_WID < CONSTS_FRAC ? 0 : CONSTS_FRAC - DT_WID;
+localparam DT_FRAC = DT_WID - DT_WHOLE;
+
+generate if (DT_UNSAT_WID > CONSTS_WID) begin
+	intsat #(
+		.IN_LEN(DT_UNSAT_WID),
+		.LTRUNC(DT_UNSAT_WID - CONSTS_WID)
+	) insat_dt (
+		.inp(dt_unsat),
+		.outp(dt)
+	);
+end else begin
+	assign dt = dt_unsat;
+end endgenerate
+
+/**** Stage 2: Calculate P + IΔt
+ *     I: CONSTS_WHOLE.CONSTS_FRAC
+ *  x  dt: DT_WHOLE.DT_FRAC
+ *-- -------------------------------
+ *     Idt_unscaled:
+ *-- --------------------------------
+ *     Idt: CONSTS_WHOLE.CONSTS_FRAC
+ *
+ * Right-truncate DT_FRAC bits to ensure CONSTS_FRAC
+ * Integer-sature the DT_WHOLE bits if it extends far enough
+ */
+
+wire stage2_finished;
+reg arm_stage2 = 0;
+wire [CONSTS_WID-1:0] idt;
+
+mul_const #(
+/* TODO: does this autoinfer CONSTS_WID? */
+	.CONSTS_WHOLE(CONSTS_WHOLE),
+	.CONSTS_FRAC(CONSTS_FRAC),
+	.IN_WHOLE(DT_WHOLE),
+	.IN_FRAC(DT_FRAC)
+) mul_const_idt (
+	.clk(clk),
+	.inp(dt),
+	.const_in(cl_I),
+	.arm(arm_stage2),
+	.outp(idt),
+	.finished(stage2_finished)
+);
+
+wire [CONSTS_WID:0] pidt_untrunc = cl_P + idt;
+/* Assuming that the constraints on cl_P, I, and dt hold */
+wire [CONSTS_WID-1:0] pidt = pidt_untrunc[CONSTS_WID-1:0];
+
+/**** Stage 3: calculate e_t(P + IΔt) and P e_{t-1} ****/
+
+reg arm_stage3 = 0;
+
+wire epidt_finished;
+wire pe_finished;
+
+wire [CONSTS_WID-1:0] epidt;
+mul_const #(
+	.CONSTS_WHOLE(CONSTS_WHOLE),
+	.CONSTS_FRAC(CONSTS_FRAC),
+	.IN_WHOLE(E_WHOLE),
+	.IN_FRAC(E_FRAC)
+) mul_const_epidt (
+	.clk(clk),
+	.inp(e),
+	.const_in(idt),
+	.arm(arm_stage3),
+	.outp(epidt),
+	.finished(epidt_finished)
+);
+
+wire [CONSTS_WID-1:0] pe;
+mul_const #(
+	.COSNTS_WHOLE(CONSTS_WHOLE),
+	.CONSTS_FRAC(CONSTS_FRAC),
+	.IN_WHOLE(E_WHOLE),
+	.IN_FRAC(E_FRAC)
+) mul_const_pe (
+	.clk(clk),
+	.inp(e),
+	.const_in(idt),
+	.arm(arm_stage3),
+	.outp(pe),
+	.finished(epidt_finished)
+);
+
+/******* State machine ********/
+localparam WAIT_ON_ARM = 0;
+localparam WAIT_ON_STAGE_1 = 1;
+localparam WAIT_ON_STAGE_2 = 2;
+localparam WAIT_ON_STAGE_3 = 3;
+localparam WAIT_ON_DISARM = 4;
+
+localparam STATE_SIZ = 3;
+reg [STATE_SIZ-1:0] state = WAIT_ON_ARM;
+
+always @ (posedge clk) begin
+	case (state) begin
+	WAIT_ON_ARM: begin
+		if (arm) begin
+			arm_stage_1 <= 1;
+			state <= WAIT_ON_STAGE_1;
+		end
+	end
+	WAIT_ON_STAGE_1: begin
+		if (mul_scale_err_fin && mul_dt_fin) begin
+			arm_stage_1 <= 0;
+			arm_stage_2 <= 1;
+			state <= WAIT_ON_STAGE_2;
+		end
+	end
+	WAIT_ON_STAGE_2: begin
+		if (stage2_finished) begin
+			arm_stage_2 <= 0;
+			arm_stage_3 <= 1;
+			state <= WAIT_ON_STAGE_3;
+		end
+	end
+	WAIT_ON_STAGE_3: begin
+		if (epidt_finished && pe_finished) begin
+			arm_stage3 <= 0;
+			finished <= 1;
+			state <= WAIT_ON_DISARM;
+		end
+	end
+	WAIT_ON_DISARM: begin
+		if (!arm) begin
+			finished <= 0;
+			state <= WAIT_ON_ARM;
+		end
+	end
+end
+
+endmodule
diff --git a/firmware/rtl/control_loop/mul_const.v b/firmware/rtl/control_loop/mul_const.v
new file mode 100644
index 0000000..cad1c89
--- /dev/null
+++ b/firmware/rtl/control_loop/mul_const.v
@@ -0,0 +1,56 @@
+module mul_const #(
+	parameter CONSTS_WHOLE = 8,
+	parameter CONSTS_FRAC = 40,
+	parameter CONSTS_WID = CONSTS_WHOLE + CONSTS_FRAC,
+	parameter IN_WHOLE = CONSTS_WHOLE,
+	parameter IN_FRAC = CONSTS_FRAC,
+	parameter IN_WID = IN_WHOLE + IN_FRAC
+) (
+	input clk,
+	input signed [IN_WID-1:0] inp,
+	input signed [CONSTS_WID-1:0] const_in,
+	input arm,
+
+	output signed [CONSTS_WID-1:0] outp,
+	output finished
+);
+
+localparam UNSAT_WID = CONSTS_WID + IN_WID;
+wire signed [UNSAT_WID-1:0] unsat;
+
+boothmul #(
+	.A1_LEN(CONSTS_WID),
+	.A2_LEN(IN_WID)
+) mul (
+	.clk(clk),
+	.arm(arm),
+	.a1(const_in),
+	.a2(inp),
+	.outn(unsat),
+	.fin(finished)
+);
+
+localparam RIGHTTRUNC_WID = UNSAT_WID - IN_FRAC;
+wire signed [RIGHTTRUNC_WID-1:0] rtrunc =
+	unsat[UNSAT_WID-1:IN_FRAC];
+
+generate if (IN_WHOLE > 0) begin
+	intsat #(
+		.IN_LEN(RIGHTTRUNC_WID),
+		.LTRUNC(IN_WHOLE)
+	) sat (
+		.inp(rtrunc),
+		.outp(outp)
+	);
+end else begin
+	assign outp = rtrunc;
+end endgenerate
+
+`ifdef VERILATOR
+initial begin
+	$dumpfile("mul_const.fst");
+	$dumpvars();
+end
+`endif
+
+endmodule
diff --git a/firmware/rtl/control_loop/mul_const_sim.cpp b/firmware/rtl/control_loop/mul_const_sim.cpp
new file mode 100644
index 0000000..f450e20
--- /dev/null
+++ b/firmware/rtl/control_loop/mul_const_sim.cpp
@@ -0,0 +1,75 @@
+#include <cstdio>
+#include <cstdint>
+#include "Vmul_const.h"
+using ModType = Vmul_const;
+
+uint32_t main_time = 0;
+double sc_time_stamp() {
+	return main_time;
+}
+
+ModType *mod;
+
+static void run_clock() {
+	for (i = 0; i < 2; i++) {
+		mod->clk = !mod->clk;
+		mod->eval();
+		main_time++;
+	}
+}
+
+static void init(int argc, char **argv) {
+	Verilator::commandArgs(argc, argv);
+	Verilated::traceEverOn(true);
+	mod = new ModType;
+	mod->clk = 0;
+}
+
+#define BITMASK(n) ((1 << (n)) - 1)
+
+static void satmul(int64_t const_in, int64_t inp) {
+	int64_t r = const_in * inp;
+	if (r >= BITMASK(48))
+		return BITMASK(48);
+	else if (r <= -BITMASK(48))
+		return -BITMASK(48);
+	else
+		return r; 
+}
+
+#define RUNS 10000
+static void run(uint64_t const_in, uint64_t inp) {
+	const_in &= BITMASK(48);
+	inp &= BITMASK(IN_WID);
+
+	mod->inp = inp;
+	mod->const_in = const_in;
+	mod->arm = 1;
+
+	while (!mod->finished)
+		run_clock();
+	mod->finished = 0;
+	run_clock();
+
+
+	int64_t real_result = satmul(const_in, inp);
+
+	if (real_result != outp) {
+		printf("%llX * %llX = %llX (got %llX)\n",
+			std::reinterpret_cast<uint64_t>(const_in),
+			std::reinterpret_cast<uint64_t>(inp),
+			std::reinterpret_cast<uint64_t>(real_result),
+			std::reinterpret-cast<uint64_t>(outp));
+		exit(1);
+	}
+}
+
+int main(int argc, char **argv) {
+	run_clock();
+
+	for (int i = 0; i < RUNS; i++) {
+		run(rand() - rand(), rand() - rand());
+	}
+
+	return 0;
+}