ram_shim: simulate

This commit is contained in:
Peter McGoron 2022-12-20 05:51:05 +00:00
parent 15480f11da
commit 4ba004336c
5 changed files with 381 additions and 53 deletions

View File

@ -2,12 +2,28 @@
.PHONY: test clean .PHONY: test clean
RAM_FIFO_SRC= ram_fifo.v ram_fifo_sim.cpp
test: obj_dir/Vram_fifo test: obj_dir/Vram_fifo obj_dir/Vram_shim
@./obj_dir/Vram_fifo && echo 'Vram_fifo successful'
obj_dir/Vram_fifo.mk: RAM_FIFO_SRC= ram_fifo.v ram_fifo_dual_port.v ram_fifo_sim.cpp
obj_dir/Vram_fifo.mk: ${RAM_FIFO_SRC}
verilator --cc --exe -Wall --trace --trace-fst \ verilator --cc --exe -Wall --trace --trace-fst \
${RAM_FIFO_SRC} ${RAM_FIFO_SRC}
obj_dir/Vram_fifo: obj_dir/Vram_fifo.mk obj_dir/Vram_fifo: obj_dir/Vram_fifo.mk
cd obj_dir && make -f Vram_fifo.mk cd obj_dir && make -f Vram_fifo.mk
@./obj_dir/Vram_fifo && echo 'Vram_fifo successful'
RAM_SHIM_SRC= ram_shim.v ram_fifo.v ram_fifo_dual_port.v ram_shim_sim.cpp
obj_dir/Vram_shim.mk: ${RAM_SHIM_SRC} ram_shim_cmds.vh ram_shim_cmds.h
verilator --cc --exe -Wall --trace --trace-fst \
-DRAM_SHIM_DEBUG \
${RAM_SHIM_SRC}
obj_dir/Vram_shim: obj_dir/Vram_shim.mk ram_shim_sim.cpp
cd obj_dir && make -f Vram_shim.mk
@./obj_dir/Vram_shim && echo 'Vram_shim successful'
####### Codegen ########
ram_shim_cmds.h: ram_shim_cmds.vh
echo '#pragma once' > ram_shim_cmds.h
sed 's/`define/#define/g; s/`//g' ram_shim_cmds.vh >> ram_shim_cmds.h

View File

@ -1,3 +1,7 @@
/* Implements a synchronous(!) FIFO using inferred Block RAM. This
* must wrap "ram_fifo_dual_port" due to difficulties YOSYS has with
* inferring Block RAM: refer to that module for details.
*/
module ram_fifo #( module ram_fifo #(
parameter DAT_WID = 24, parameter DAT_WID = 24,
parameter FIFO_DEPTH_WID = 11, parameter FIFO_DEPTH_WID = 11,

View File

@ -1,88 +1,184 @@
/* Ram shim. This is an interface designed for a LiteX RAM /* Ram shim. This is an interface designed for a LiteX RAM DMA module.
* DMA module. It can also be connected to a simulator. * It can also be connected to a simulator.
* *
* The read end is implemented in C since all of this is * The read end is implemented in C since all of this is backed by memory.
* backed by memory. *
* In between the system RAM and the raster scan is a block RAM FIFO so
* scanning is not interrupted by transient RAM accesses from the system.
* *
* THIS MODULE ASSUMES that RAM_WORD < DAT_WID < RAM_WORD*2. * THIS MODULE ASSUMES that RAM_WORD < DAT_WID < RAM_WORD*2.
*
* TODO: Buffer the data (using something like block ram) and
* write it out asynchronously. This will require instantiating
* the block ram primitive directly for Yosys. This should make
* writes to RAM smoother, and reads smoother when the CPU is
* reading the data.
*/ */
`include "ram_shim_cmds.vh"
module ram_shim #( module ram_shim #(
parameter BASE_ADDR = 32'h1000000,
parameter MAX_BYTE_WID = 13,
parameter DAT_WID = 24, parameter DAT_WID = 24,
parameter RAM_WORD = 16, parameter RAM_WORD = 16,
parameter RAM_WID = 32 parameter RAM_WID = 32
) ( ) (
input clk, input clk,
input signed [DAT_WID-1:0] data, input rst,
input commit,
/* Raster control interface. The kernel allocates memory and informs the
* shim what the memory location is, and how long it is (max certain length).
* This is also where the current write pointer is found so that the
* kernel can read data from the scanner into memory and out to the
* controlling computer. */
input [RAM_WID-1:0] cmd_data,
input [`RAM_SHIM_CMD_WID-1:0] cmd,
input cmd_active,
output reg cmd_finished,
output [RAM_WID-1:0] cmd_data_out,
input [DAT_WID-1:0] data,
input data_commit,
output reg finished, output reg finished,
/* Used by the kernel code to request the current `ifdef RAM_SHIM_DEBUG
* location of the FIFO head. Used to memcpy data, wire fifo_steady,
* it might better than repeatedly calling a FIFO `endif
* read.
*/
input read_end_req_off,
output reg [RAM_WID-1:0] read_end_addr,
output reg read_end_req_valid,
/* RAM DMA interface. */
output reg [RAM_WORD-1:0] word, output reg [RAM_WORD-1:0] word,
output [RAM_WID-1:0] addr, output [RAM_WID-1:0] addr,
output reg write, output reg write,
input valid input valid
); );
localparam WAIT_ON_COMMIT = 0; /* Control interface code.
localparam HIGH_WORD_LOAD = 1; * Each of these are BYTE level addresses. Most numbers in Verilog are
localparam WAIT_ON_HIGH_WORD = 2; * BITS. When converting from bits to bytes, divide by 8. */
localparam WAIT_ON_COMMIT_DEASSERT = 3;
reg [2:0] state = WAIT_ON_COMMIT;
reg [MAX_BYTE_WID-1:0] offset = 0; reg [RAM_WID-1:0] loc_start = 0;
assign addr = BASE_ADDR + {{(RAM_WID - MAX_BYTE_WID){1'b0}}, offset}; reg [RAM_WID-1:0] loc_len = 0;
initial read_end_req_valid = 0; reg [RAM_WID-1:0] loc_off = 0;
assign addr = loc_start + loc_off;
always @ (posedge clk) begin always @ (posedge clk) begin
if (read_end_req_off && !read_end_req_valid) begin if (cmd_active && !cmd_finished) case (cmd)
read_end_req_valid = 1; `RAM_SHIM_WRITE_LOC: begin
read_end_addr <= addr; loc_start <= cmd_data;
end else if (read_end_req_valid && !read_end_req_off) begin loc_off <= 0;
read_end_req_valid <= 0; cmd_finished <= 1;
end
`RAM_SHIM_WRITE_LEN: begin
loc_len <= cmd_data;
loc_off <= 0;
cmd_finished <= 1;
end
`RAM_SHIM_READ_PTR: begin
cmd_data_out <= addr;
cmd_finished <= 1;
end
endcase else begin
cmd_finished <= 0;
end end
end end
/* Block RAM FIFO controller. */
reg read_enable = 0;
reg write_enable = 0;
reg [DAT_WID-1:0] write_dat = 0;
wire [DAT_WID-1:0] read_dat;
wire empty;
wire full;
ram_fifo #(
.DAT_WID(DAT_WID)
) pre_fifo (
.clk(clk),
.rst(rst),
.read_enable(read_enable),
.write_enable(write_enable),
.write_dat(write_dat),
.read_dat(read_dat),
.empty(empty),
.full(full)
);
/* Code to take data from Block RAM and put it into System RAM. */
localparam WAIT_ON_EMPTY = 0;
localparam READ_OFF_FIFO = 1;
localparam HIGH_WORD_LOAD = 2;
localparam WAIT_ON_HIGH_WORD = 3;
reg [1:0] writestate = WAIT_ON_EMPTY;
/* Originally the simulation code checked if the intermediate FIFO was
* empty, and then stopped running the simulation. This led to an off
* by one error where the very last value pushed was not read. Instead,
* the simulator now checks for steady-ness, which means that the always
* block has idled at the WAIT_ON_EMPTY state for two cycles.
*/
`ifdef RAM_SHIM_DEBUG
reg [1:0] prev_writestate;
always @ (posedge clk) prev_writestate <= writestate;
assign fifo_steady = prev_writestate == WAIT_ON_EMPTY && writestate == WAIT_ON_EMPTY;
`endif
always @ (posedge clk) begin always @ (posedge clk) begin
case (state) case (writestate)
WAIT_ON_COMMIT: if (commit) begin WAIT_ON_EMPTY: if (!empty) begin
word <= data[RAM_WORD-1:0]; writestate <= READ_OFF_FIFO;
/* This value is raised on the at the beginning of the
* next clock cycle. A read takes one clock cycle, so
* the next clock cycle has to disarm read_enable, and
* then the cycle *after that* must read the data from
* the FIFO.
*/
read_enable <= 1;
end
READ_OFF_FIFO: if (read_enable) begin
read_enable <= 0;
end else begin
word <= read_dat[RAM_WORD-1:0];
write <= 1; write <= 1;
state <= HIGH_WORD_LOAD; writestate <= HIGH_WORD_LOAD;
end end
HIGH_WORD_LOAD: if (valid) begin HIGH_WORD_LOAD: if (valid) begin
offset <= offset + (RAM_WORD/2); if (loc_off == loc_len - 1)
loc_off <= 0;
else
loc_off <= loc_off + RAM_WORD/8;
write <= 0; write <= 0;
word <= {{(RAM_WORD*2 - DAT_WID){data[DAT_WID-1]}}, word <= {{(RAM_WORD*2 - DAT_WID){read_dat[DAT_WID-1]}},
data[DAT_WID-1:RAM_WORD]}; read_dat[DAT_WID-1:RAM_WORD]};
state <= WAIT_ON_HIGH_WORD; writestate <= WAIT_ON_HIGH_WORD;
end end
WAIT_ON_HIGH_WORD: if (!write) begin WAIT_ON_HIGH_WORD: if (!write) begin
write <= 1; write <= 1;
end else if (valid) begin end else if (valid) begin
offset <= offset + (RAM_WORD / 2); if (loc_off == loc_len - 1)
state <= WAIT_ON_COMMIT_DEASSERT; loc_off <= 0;
finished <= 1; else
end loc_off <= loc_off + RAM_WORD/8;
WAIT_ON_COMMIT_DEASSERT: if (!commit) begin writestate <= WAIT_ON_EMPTY;
finished <= 0; write <= 0;
end end
endcase endcase
end end
/* read to memory */
always @ (posedge clk) begin
if (data_commit && !write_enable && !full) begin
write_dat <= data;
write_enable <= 1;
end else if (data_commit && write_enable) begin
write_enable <= 0;
finished <= 1;
end else if (!data_commit && finished) begin
finished <= 0;
write_enable <= 0;
end
end
/*
`ifdef VERILATOR
initial begin
$dumpfile("ram_shim.vcd");
$dumpvars;
end
`endif
*/
endmodule endmodule

View File

@ -0,0 +1,5 @@
`define RAM_SHIM_NO_OP 0
`define RAM_SHIM_WRITE_LOC 1
`define RAM_SHIM_WRITE_LEN 2
`define RAM_SHIM_READ_PTR 3
`define RAM_SHIM_CMD_WID 8

View File

@ -0,0 +1,207 @@
#include <memory>
#include <cassert>
#include <limits>
#include <cstdint>
#include <cstring>
#include <cstdlib>
#include <iostream>
#include <random>
#include <unistd.h>
#include <verilated.h>
#include "ram_shim_cmds.h"
#include "Vram_shim.h"
using ModType = Vram_shim;
ModType *mod;
uint32_t main_time = 0;
double sc_time_stamp() {
return main_time;
}
static void run_clock() {
for (int i = 0; i < 2; i++) {
mod->clk = !mod->clk;
mod->eval();
main_time++;
}
}
static void cleanup_exit() {
mod->final();
delete mod;
}
static void init(int argc, char **argv) {
Verilated::commandArgs(argc, argv);
Verilated::traceEverOn(true);
mod = new ModType;
mod->clk = 0;
atexit(cleanup_exit);
char *seed = getenv("RANDOM_SEED");
if (seed) {
unsigned long i = strtoul(seed, NULL, 10);
srand((unsigned int)i);
}
}
static void init_values() {
mod->rst = 0;
mod->cmd_data = 0;
mod->cmd = 0;
mod->cmd_active = 0;
mod->data = 0;
mod->data_commit = 0;
mod->valid = 0;
}
using V = uint32_t;
// Verilator makes all ports unsigned, even when marked as signed in
// Verilog.
V sign_extend(V x, unsigned len) {
// if high bit is 1
if (x >> (len - 1) & 1) {
// This mask selects all bits below the highest bit.
// By inverting it, it selects the highest bit, and all
// higher bits that must be sign extended.
V mask = (1 << len) - 1;
// Set all high bits to 1. The mask has all bits lower
// than the highest bit 0, so the bits in "x" pass through.
return ~mask | x;
} else {
return x;
}
}
#define MASK_TO(x,n) ((x) & ((1 << (n)) - 1))
/* Test memory buffering and memory interface.
* The memory interface takes 16 bits integers at a time. The ram interface
* runs slower than the insertion loop, to test buffering.
*
* The values given to the Verilog module are also stored in memory as
* 32 bit integers. These are compared with the memory that simulates the
* RAM interface.
*/
#define MEMORY_LEN 1000 // How many 32 bit integers
#define MEMORY_LEN_16BIT MEMORY_LEN*2 // How many 16 bit parts
#define MEMORY_START 0x10241024
static std::array<uint16_t, MEMORY_LEN_16BIT> backing_memory;
static std::array<bool, MEMORY_LEN_16BIT> backing_memory_accessed;
#define MEMORY_WAIT_TIME 50
static void handle_memory() {
// Memory counter is used to simulate RAM delay.
// TODO; random ram delay
static uint32_t memory_counter = 0;
if (mod->write) {
if (memory_counter == MEMORY_WAIT_TIME) {
mod->valid = 1;
return;
}
if (memory_counter == 0) {
uint32_t memory_access_ind = 0;
assert(mod->addr >= MEMORY_START);
memory_access_ind = mod->addr - MEMORY_START;
// Addresses are bytes, but writes are always 16 bits.
// Ensure we are writing to a 16 bit boundary.
assert(memory_access_ind % 2 == 0);
memory_access_ind /= 2;
// Check to make sure that the RAM interface is not overwriting
// memory locations. For now, it should not do that.
assert(!backing_memory_accessed[memory_access_ind]);
backing_memory_accessed[memory_access_ind] = true;
assert(memory_access_ind < MEMORY_LEN_16BIT);
backing_memory[memory_access_ind] = mod->word;
// printf("RAM end: %x @ %d\n", backing_memory[memory_access_ind], memory_access_ind);
}
memory_counter++;
} else {
mod->valid = 0;
assert(memory_counter == MEMORY_WAIT_TIME || memory_counter == 0);
memory_counter = 0;
}
}
static void init_memory() {
mod->cmd_data = MEMORY_LEN;
mod->cmd = RAM_SHIM_WRITE_LEN;
mod->cmd_active = 1;
while (!mod->cmd_finished)
run_clock();
mod->cmd_active = 0;
run_clock();
mod->cmd_data = MEMORY_START;
mod->cmd = RAM_SHIM_WRITE_LOC;
mod->cmd_active = 1;
while (!mod->cmd_finished)
run_clock();
mod->cmd_active = 0;
run_clock();
}
static std::array<uint32_t, MEMORY_LEN> generated_memory;
constexpr int CYCLE_WAIT = 10;
int main(int argc, char **argv) {
init(argc, argv);
init_values();
init_memory();
/* Every CYCLE_WAIT cycles, push one value to RAM.
* This should be smaller than the amount of time it takes for
* the ram to "process" the added value.
*/
int i = 0;
int cntr = 0;
while (i < MEMORY_LEN) {
run_clock();
handle_memory();
if (cntr == CYCLE_WAIT) {
if (!mod->finished && !mod->data_commit) {
generated_memory[i] = sign_extend(MASK_TO(rand(), 24), 24);
// printf("Sending: %d, %x\n", i, generated_memory[i]);
mod->data = generated_memory[i];
mod->data_commit = 1;
} else if (mod->finished && mod->data_commit) {
mod->data_commit = 0;
i++;
cntr = 0;
}
} else {
cntr++;
}
}
fprintf(stderr, "Waiting on bram\n");
while (!mod->fifo_steady) {
run_clock();
handle_memory();
}
handle_memory();
fprintf(stderr, "Bram complete\n");
for (i = 0; i < MEMORY_LEN_16BIT; i+=2) {
uint32_t nv = (uint32_t)backing_memory[i+1] << 16 | backing_memory[i];
if (generated_memory[i/2] != nv) {
fprintf(stderr, "%d: %x != %x\n", i, generated_memory[i/2], nv);
exit(1);
}
}
return 0;
}