diff --git a/firmware/rtl/raster/Makefile b/firmware/rtl/raster/Makefile index abe471e..5a42cc6 100644 --- a/firmware/rtl/raster/Makefile +++ b/firmware/rtl/raster/Makefile @@ -2,12 +2,28 @@ .PHONY: test clean -RAM_FIFO_SRC= ram_fifo.v ram_fifo_sim.cpp -test: obj_dir/Vram_fifo - @./obj_dir/Vram_fifo && echo 'Vram_fifo successful' -obj_dir/Vram_fifo.mk: +test: obj_dir/Vram_fifo obj_dir/Vram_shim + +RAM_FIFO_SRC= ram_fifo.v ram_fifo_dual_port.v ram_fifo_sim.cpp +obj_dir/Vram_fifo.mk: ${RAM_FIFO_SRC} verilator --cc --exe -Wall --trace --trace-fst \ ${RAM_FIFO_SRC} obj_dir/Vram_fifo: obj_dir/Vram_fifo.mk cd obj_dir && make -f Vram_fifo.mk + @./obj_dir/Vram_fifo && echo 'Vram_fifo successful' + +RAM_SHIM_SRC= ram_shim.v ram_fifo.v ram_fifo_dual_port.v ram_shim_sim.cpp +obj_dir/Vram_shim.mk: ${RAM_SHIM_SRC} ram_shim_cmds.vh ram_shim_cmds.h + verilator --cc --exe -Wall --trace --trace-fst \ + -DRAM_SHIM_DEBUG \ + ${RAM_SHIM_SRC} +obj_dir/Vram_shim: obj_dir/Vram_shim.mk ram_shim_sim.cpp + cd obj_dir && make -f Vram_shim.mk + @./obj_dir/Vram_shim && echo 'Vram_shim successful' + +####### Codegen ######## + +ram_shim_cmds.h: ram_shim_cmds.vh + echo '#pragma once' > ram_shim_cmds.h + sed 's/`define/#define/g; s/`//g' ram_shim_cmds.vh >> ram_shim_cmds.h diff --git a/firmware/rtl/raster/ram_fifo.v b/firmware/rtl/raster/ram_fifo.v index 3834694..ce0e3d6 100644 --- a/firmware/rtl/raster/ram_fifo.v +++ b/firmware/rtl/raster/ram_fifo.v @@ -1,3 +1,7 @@ +/* Implements a synchronous(!) FIFO using inferred Block RAM. This + * must wrap "ram_fifo_dual_port" due to difficulties YOSYS has with + * inferring Block RAM: refer to that module for details. + */ module ram_fifo #( parameter DAT_WID = 24, parameter FIFO_DEPTH_WID = 11, diff --git a/firmware/rtl/raster/ram_shim.v b/firmware/rtl/raster/ram_shim.v index 18326d6..411467e 100644 --- a/firmware/rtl/raster/ram_shim.v +++ b/firmware/rtl/raster/ram_shim.v @@ -1,88 +1,184 @@ -/* Ram shim. This is an interface designed for a LiteX RAM - * DMA module. It can also be connected to a simulator. +/* Ram shim. This is an interface designed for a LiteX RAM DMA module. + * It can also be connected to a simulator. * - * The read end is implemented in C since all of this is - * backed by memory. + * The read end is implemented in C since all of this is backed by memory. + * + * In between the system RAM and the raster scan is a block RAM FIFO so + * scanning is not interrupted by transient RAM accesses from the system. * * THIS MODULE ASSUMES that RAM_WORD < DAT_WID < RAM_WORD*2. - * - * TODO: Buffer the data (using something like block ram) and - * write it out asynchronously. This will require instantiating - * the block ram primitive directly for Yosys. This should make - * writes to RAM smoother, and reads smoother when the CPU is - * reading the data. */ +`include "ram_shim_cmds.vh" module ram_shim #( - parameter BASE_ADDR = 32'h1000000, - parameter MAX_BYTE_WID = 13, parameter DAT_WID = 24, parameter RAM_WORD = 16, parameter RAM_WID = 32 ) ( input clk, - input signed [DAT_WID-1:0] data, - input commit, + input rst, + + /* Raster control interface. The kernel allocates memory and informs the + * shim what the memory location is, and how long it is (max certain length). + * This is also where the current write pointer is found so that the + * kernel can read data from the scanner into memory and out to the + * controlling computer. */ + input [RAM_WID-1:0] cmd_data, + input [`RAM_SHIM_CMD_WID-1:0] cmd, + input cmd_active, + output reg cmd_finished, + output [RAM_WID-1:0] cmd_data_out, + + input [DAT_WID-1:0] data, + input data_commit, output reg finished, - /* Used by the kernel code to request the current - * location of the FIFO head. Used to memcpy data, - * it might better than repeatedly calling a FIFO - * read. - */ - input read_end_req_off, - output reg [RAM_WID-1:0] read_end_addr, - output reg read_end_req_valid, +`ifdef RAM_SHIM_DEBUG + wire fifo_steady, +`endif + /* RAM DMA interface. */ output reg [RAM_WORD-1:0] word, output [RAM_WID-1:0] addr, output reg write, input valid ); -localparam WAIT_ON_COMMIT = 0; -localparam HIGH_WORD_LOAD = 1; -localparam WAIT_ON_HIGH_WORD = 2; -localparam WAIT_ON_COMMIT_DEASSERT = 3; -reg [2:0] state = WAIT_ON_COMMIT; +/* Control interface code. + * Each of these are BYTE level addresses. Most numbers in Verilog are + * BITS. When converting from bits to bytes, divide by 8. */ -reg [MAX_BYTE_WID-1:0] offset = 0; -assign addr = BASE_ADDR + {{(RAM_WID - MAX_BYTE_WID){1'b0}}, offset}; -initial read_end_req_valid = 0; +reg [RAM_WID-1:0] loc_start = 0; +reg [RAM_WID-1:0] loc_len = 0; +reg [RAM_WID-1:0] loc_off = 0; + +assign addr = loc_start + loc_off; always @ (posedge clk) begin - if (read_end_req_off && !read_end_req_valid) begin - read_end_req_valid = 1; - read_end_addr <= addr; - end else if (read_end_req_valid && !read_end_req_off) begin - read_end_req_valid <= 0; + if (cmd_active && !cmd_finished) case (cmd) + `RAM_SHIM_WRITE_LOC: begin + loc_start <= cmd_data; + loc_off <= 0; + cmd_finished <= 1; + end + `RAM_SHIM_WRITE_LEN: begin + loc_len <= cmd_data; + loc_off <= 0; + cmd_finished <= 1; + end + `RAM_SHIM_READ_PTR: begin + cmd_data_out <= addr; + cmd_finished <= 1; + end + endcase else begin + cmd_finished <= 0; end end +/* Block RAM FIFO controller. */ + +reg read_enable = 0; +reg write_enable = 0; +reg [DAT_WID-1:0] write_dat = 0; +wire [DAT_WID-1:0] read_dat; +wire empty; +wire full; +ram_fifo #( + .DAT_WID(DAT_WID) +) pre_fifo ( + .clk(clk), + .rst(rst), + .read_enable(read_enable), + .write_enable(write_enable), + .write_dat(write_dat), + .read_dat(read_dat), + .empty(empty), + .full(full) +); + +/* Code to take data from Block RAM and put it into System RAM. */ + +localparam WAIT_ON_EMPTY = 0; +localparam READ_OFF_FIFO = 1; +localparam HIGH_WORD_LOAD = 2; +localparam WAIT_ON_HIGH_WORD = 3; +reg [1:0] writestate = WAIT_ON_EMPTY; + +/* Originally the simulation code checked if the intermediate FIFO was + * empty, and then stopped running the simulation. This led to an off + * by one error where the very last value pushed was not read. Instead, + * the simulator now checks for steady-ness, which means that the always + * block has idled at the WAIT_ON_EMPTY state for two cycles. + */ +`ifdef RAM_SHIM_DEBUG +reg [1:0] prev_writestate; +always @ (posedge clk) prev_writestate <= writestate; +assign fifo_steady = prev_writestate == WAIT_ON_EMPTY && writestate == WAIT_ON_EMPTY; +`endif + always @ (posedge clk) begin - case (state) - WAIT_ON_COMMIT: if (commit) begin - word <= data[RAM_WORD-1:0]; + case (writestate) + WAIT_ON_EMPTY: if (!empty) begin + writestate <= READ_OFF_FIFO; + /* This value is raised on the at the beginning of the + * next clock cycle. A read takes one clock cycle, so + * the next clock cycle has to disarm read_enable, and + * then the cycle *after that* must read the data from + * the FIFO. + */ + read_enable <= 1; + end + READ_OFF_FIFO: if (read_enable) begin + read_enable <= 0; + end else begin + word <= read_dat[RAM_WORD-1:0]; write <= 1; - state <= HIGH_WORD_LOAD; + writestate <= HIGH_WORD_LOAD; end HIGH_WORD_LOAD: if (valid) begin - offset <= offset + (RAM_WORD/2); + if (loc_off == loc_len - 1) + loc_off <= 0; + else + loc_off <= loc_off + RAM_WORD/8; + write <= 0; - word <= {{(RAM_WORD*2 - DAT_WID){data[DAT_WID-1]}}, - data[DAT_WID-1:RAM_WORD]}; - state <= WAIT_ON_HIGH_WORD; + word <= {{(RAM_WORD*2 - DAT_WID){read_dat[DAT_WID-1]}}, + read_dat[DAT_WID-1:RAM_WORD]}; + writestate <= WAIT_ON_HIGH_WORD; end WAIT_ON_HIGH_WORD: if (!write) begin write <= 1; end else if (valid) begin - offset <= offset + (RAM_WORD / 2); - state <= WAIT_ON_COMMIT_DEASSERT; - finished <= 1; - end - WAIT_ON_COMMIT_DEASSERT: if (!commit) begin - finished <= 0; + if (loc_off == loc_len - 1) + loc_off <= 0; + else + loc_off <= loc_off + RAM_WORD/8; + writestate <= WAIT_ON_EMPTY; + write <= 0; end endcase end +/* read to memory */ +always @ (posedge clk) begin + if (data_commit && !write_enable && !full) begin + write_dat <= data; + write_enable <= 1; + end else if (data_commit && write_enable) begin + write_enable <= 0; + finished <= 1; + end else if (!data_commit && finished) begin + finished <= 0; + write_enable <= 0; + end +end + +/* +`ifdef VERILATOR +initial begin + $dumpfile("ram_shim.vcd"); + $dumpvars; +end +`endif +*/ + endmodule diff --git a/firmware/rtl/raster/ram_shim_cmds.vh b/firmware/rtl/raster/ram_shim_cmds.vh new file mode 100644 index 0000000..8cb3917 --- /dev/null +++ b/firmware/rtl/raster/ram_shim_cmds.vh @@ -0,0 +1,5 @@ +`define RAM_SHIM_NO_OP 0 +`define RAM_SHIM_WRITE_LOC 1 +`define RAM_SHIM_WRITE_LEN 2 +`define RAM_SHIM_READ_PTR 3 +`define RAM_SHIM_CMD_WID 8 diff --git a/firmware/rtl/raster/ram_shim_sim.cpp b/firmware/rtl/raster/ram_shim_sim.cpp new file mode 100644 index 0000000..a8fbb8e --- /dev/null +++ b/firmware/rtl/raster/ram_shim_sim.cpp @@ -0,0 +1,207 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "ram_shim_cmds.h" + +#include "Vram_shim.h" +using ModType = Vram_shim; +ModType *mod; + +uint32_t main_time = 0; + +double sc_time_stamp() { + return main_time; +} + +static void run_clock() { + for (int i = 0; i < 2; i++) { + mod->clk = !mod->clk; + mod->eval(); + main_time++; + } +} + +static void cleanup_exit() { + mod->final(); + delete mod; +} + +static void init(int argc, char **argv) { + Verilated::commandArgs(argc, argv); + Verilated::traceEverOn(true); + mod = new ModType; + mod->clk = 0; + atexit(cleanup_exit); + + char *seed = getenv("RANDOM_SEED"); + if (seed) { + unsigned long i = strtoul(seed, NULL, 10); + srand((unsigned int)i); + } +} + +static void init_values() { + mod->rst = 0; + mod->cmd_data = 0; + mod->cmd = 0; + mod->cmd_active = 0; + + mod->data = 0; + mod->data_commit = 0; + mod->valid = 0; +} + +using V = uint32_t; + +// Verilator makes all ports unsigned, even when marked as signed in +// Verilog. +V sign_extend(V x, unsigned len) { + // if high bit is 1 + if (x >> (len - 1) & 1) { + // This mask selects all bits below the highest bit. + // By inverting it, it selects the highest bit, and all + // higher bits that must be sign extended. + V mask = (1 << len) - 1; + // Set all high bits to 1. The mask has all bits lower + // than the highest bit 0, so the bits in "x" pass through. + return ~mask | x; + } else { + return x; + } +} +#define MASK_TO(x,n) ((x) & ((1 << (n)) - 1)) + +/* Test memory buffering and memory interface. + * The memory interface takes 16 bits integers at a time. The ram interface + * runs slower than the insertion loop, to test buffering. + * + * The values given to the Verilog module are also stored in memory as + * 32 bit integers. These are compared with the memory that simulates the + * RAM interface. + */ + +#define MEMORY_LEN 1000 // How many 32 bit integers +#define MEMORY_LEN_16BIT MEMORY_LEN*2 // How many 16 bit parts +#define MEMORY_START 0x10241024 +static std::array backing_memory; +static std::array backing_memory_accessed; +#define MEMORY_WAIT_TIME 50 + +static void handle_memory() { + // Memory counter is used to simulate RAM delay. + // TODO; random ram delay + static uint32_t memory_counter = 0; + + if (mod->write) { + if (memory_counter == MEMORY_WAIT_TIME) { + mod->valid = 1; + return; + } + + if (memory_counter == 0) { + uint32_t memory_access_ind = 0; + + assert(mod->addr >= MEMORY_START); + memory_access_ind = mod->addr - MEMORY_START; + + // Addresses are bytes, but writes are always 16 bits. + // Ensure we are writing to a 16 bit boundary. + assert(memory_access_ind % 2 == 0); + memory_access_ind /= 2; + + // Check to make sure that the RAM interface is not overwriting + // memory locations. For now, it should not do that. + assert(!backing_memory_accessed[memory_access_ind]); + backing_memory_accessed[memory_access_ind] = true; + + assert(memory_access_ind < MEMORY_LEN_16BIT); + backing_memory[memory_access_ind] = mod->word; + // printf("RAM end: %x @ %d\n", backing_memory[memory_access_ind], memory_access_ind); + } + memory_counter++; + } else { + mod->valid = 0; + assert(memory_counter == MEMORY_WAIT_TIME || memory_counter == 0); + memory_counter = 0; + } +} + +static void init_memory() { + mod->cmd_data = MEMORY_LEN; + mod->cmd = RAM_SHIM_WRITE_LEN; + + mod->cmd_active = 1; + while (!mod->cmd_finished) + run_clock(); + mod->cmd_active = 0; + run_clock(); + + mod->cmd_data = MEMORY_START; + mod->cmd = RAM_SHIM_WRITE_LOC; + + mod->cmd_active = 1; + while (!mod->cmd_finished) + run_clock(); + mod->cmd_active = 0; + run_clock(); +} + +static std::array generated_memory; +constexpr int CYCLE_WAIT = 10; + +int main(int argc, char **argv) { + init(argc, argv); + init_values(); + init_memory(); + + /* Every CYCLE_WAIT cycles, push one value to RAM. + * This should be smaller than the amount of time it takes for + * the ram to "process" the added value. + */ + int i = 0; + int cntr = 0; + while (i < MEMORY_LEN) { + run_clock(); + handle_memory(); + + if (cntr == CYCLE_WAIT) { + if (!mod->finished && !mod->data_commit) { + generated_memory[i] = sign_extend(MASK_TO(rand(), 24), 24); + // printf("Sending: %d, %x\n", i, generated_memory[i]); + mod->data = generated_memory[i]; + mod->data_commit = 1; + } else if (mod->finished && mod->data_commit) { + mod->data_commit = 0; + i++; + cntr = 0; + } + } else { + cntr++; + } + } + + fprintf(stderr, "Waiting on bram\n"); + while (!mod->fifo_steady) { + run_clock(); + handle_memory(); + } + handle_memory(); + fprintf(stderr, "Bram complete\n"); + + for (i = 0; i < MEMORY_LEN_16BIT; i+=2) { + uint32_t nv = (uint32_t)backing_memory[i+1] << 16 | backing_memory[i]; + if (generated_memory[i/2] != nv) { + fprintf(stderr, "%d: %x != %x\n", i, generated_memory[i/2], nv); + exit(1); + } + } + + return 0; +}