ram_shim: simulate
This commit is contained in:
parent
15480f11da
commit
4ba004336c
|
@ -2,12 +2,28 @@
|
|||
|
||||
.PHONY: test clean
|
||||
|
||||
RAM_FIFO_SRC= ram_fifo.v ram_fifo_sim.cpp
|
||||
|
||||
test: obj_dir/Vram_fifo
|
||||
@./obj_dir/Vram_fifo && echo 'Vram_fifo successful'
|
||||
obj_dir/Vram_fifo.mk:
|
||||
test: obj_dir/Vram_fifo obj_dir/Vram_shim
|
||||
|
||||
RAM_FIFO_SRC= ram_fifo.v ram_fifo_dual_port.v ram_fifo_sim.cpp
|
||||
obj_dir/Vram_fifo.mk: ${RAM_FIFO_SRC}
|
||||
verilator --cc --exe -Wall --trace --trace-fst \
|
||||
${RAM_FIFO_SRC}
|
||||
obj_dir/Vram_fifo: obj_dir/Vram_fifo.mk
|
||||
cd obj_dir && make -f Vram_fifo.mk
|
||||
@./obj_dir/Vram_fifo && echo 'Vram_fifo successful'
|
||||
|
||||
RAM_SHIM_SRC= ram_shim.v ram_fifo.v ram_fifo_dual_port.v ram_shim_sim.cpp
|
||||
obj_dir/Vram_shim.mk: ${RAM_SHIM_SRC} ram_shim_cmds.vh ram_shim_cmds.h
|
||||
verilator --cc --exe -Wall --trace --trace-fst \
|
||||
-DRAM_SHIM_DEBUG \
|
||||
${RAM_SHIM_SRC}
|
||||
obj_dir/Vram_shim: obj_dir/Vram_shim.mk ram_shim_sim.cpp
|
||||
cd obj_dir && make -f Vram_shim.mk
|
||||
@./obj_dir/Vram_shim && echo 'Vram_shim successful'
|
||||
|
||||
####### Codegen ########
|
||||
|
||||
ram_shim_cmds.h: ram_shim_cmds.vh
|
||||
echo '#pragma once' > ram_shim_cmds.h
|
||||
sed 's/`define/#define/g; s/`//g' ram_shim_cmds.vh >> ram_shim_cmds.h
|
||||
|
|
|
@ -1,3 +1,7 @@
|
|||
/* Implements a synchronous(!) FIFO using inferred Block RAM. This
|
||||
* must wrap "ram_fifo_dual_port" due to difficulties YOSYS has with
|
||||
* inferring Block RAM: refer to that module for details.
|
||||
*/
|
||||
module ram_fifo #(
|
||||
parameter DAT_WID = 24,
|
||||
parameter FIFO_DEPTH_WID = 11,
|
||||
|
|
|
@ -1,88 +1,184 @@
|
|||
/* Ram shim. This is an interface designed for a LiteX RAM
|
||||
* DMA module. It can also be connected to a simulator.
|
||||
/* Ram shim. This is an interface designed for a LiteX RAM DMA module.
|
||||
* It can also be connected to a simulator.
|
||||
*
|
||||
* The read end is implemented in C since all of this is
|
||||
* backed by memory.
|
||||
* The read end is implemented in C since all of this is backed by memory.
|
||||
*
|
||||
* In between the system RAM and the raster scan is a block RAM FIFO so
|
||||
* scanning is not interrupted by transient RAM accesses from the system.
|
||||
*
|
||||
* THIS MODULE ASSUMES that RAM_WORD < DAT_WID < RAM_WORD*2.
|
||||
*
|
||||
* TODO: Buffer the data (using something like block ram) and
|
||||
* write it out asynchronously. This will require instantiating
|
||||
* the block ram primitive directly for Yosys. This should make
|
||||
* writes to RAM smoother, and reads smoother when the CPU is
|
||||
* reading the data.
|
||||
*/
|
||||
`include "ram_shim_cmds.vh"
|
||||
module ram_shim #(
|
||||
parameter BASE_ADDR = 32'h1000000,
|
||||
parameter MAX_BYTE_WID = 13,
|
||||
parameter DAT_WID = 24,
|
||||
parameter RAM_WORD = 16,
|
||||
parameter RAM_WID = 32
|
||||
) (
|
||||
input clk,
|
||||
input signed [DAT_WID-1:0] data,
|
||||
input commit,
|
||||
input rst,
|
||||
|
||||
/* Raster control interface. The kernel allocates memory and informs the
|
||||
* shim what the memory location is, and how long it is (max certain length).
|
||||
* This is also where the current write pointer is found so that the
|
||||
* kernel can read data from the scanner into memory and out to the
|
||||
* controlling computer. */
|
||||
input [RAM_WID-1:0] cmd_data,
|
||||
input [`RAM_SHIM_CMD_WID-1:0] cmd,
|
||||
input cmd_active,
|
||||
output reg cmd_finished,
|
||||
output [RAM_WID-1:0] cmd_data_out,
|
||||
|
||||
input [DAT_WID-1:0] data,
|
||||
input data_commit,
|
||||
output reg finished,
|
||||
|
||||
/* Used by the kernel code to request the current
|
||||
* location of the FIFO head. Used to memcpy data,
|
||||
* it might better than repeatedly calling a FIFO
|
||||
* read.
|
||||
*/
|
||||
input read_end_req_off,
|
||||
output reg [RAM_WID-1:0] read_end_addr,
|
||||
output reg read_end_req_valid,
|
||||
`ifdef RAM_SHIM_DEBUG
|
||||
wire fifo_steady,
|
||||
`endif
|
||||
|
||||
/* RAM DMA interface. */
|
||||
output reg [RAM_WORD-1:0] word,
|
||||
output [RAM_WID-1:0] addr,
|
||||
output reg write,
|
||||
input valid
|
||||
);
|
||||
|
||||
localparam WAIT_ON_COMMIT = 0;
|
||||
localparam HIGH_WORD_LOAD = 1;
|
||||
localparam WAIT_ON_HIGH_WORD = 2;
|
||||
localparam WAIT_ON_COMMIT_DEASSERT = 3;
|
||||
reg [2:0] state = WAIT_ON_COMMIT;
|
||||
/* Control interface code.
|
||||
* Each of these are BYTE level addresses. Most numbers in Verilog are
|
||||
* BITS. When converting from bits to bytes, divide by 8. */
|
||||
|
||||
reg [MAX_BYTE_WID-1:0] offset = 0;
|
||||
assign addr = BASE_ADDR + {{(RAM_WID - MAX_BYTE_WID){1'b0}}, offset};
|
||||
initial read_end_req_valid = 0;
|
||||
reg [RAM_WID-1:0] loc_start = 0;
|
||||
reg [RAM_WID-1:0] loc_len = 0;
|
||||
reg [RAM_WID-1:0] loc_off = 0;
|
||||
|
||||
assign addr = loc_start + loc_off;
|
||||
|
||||
always @ (posedge clk) begin
|
||||
if (read_end_req_off && !read_end_req_valid) begin
|
||||
read_end_req_valid = 1;
|
||||
read_end_addr <= addr;
|
||||
end else if (read_end_req_valid && !read_end_req_off) begin
|
||||
read_end_req_valid <= 0;
|
||||
if (cmd_active && !cmd_finished) case (cmd)
|
||||
`RAM_SHIM_WRITE_LOC: begin
|
||||
loc_start <= cmd_data;
|
||||
loc_off <= 0;
|
||||
cmd_finished <= 1;
|
||||
end
|
||||
`RAM_SHIM_WRITE_LEN: begin
|
||||
loc_len <= cmd_data;
|
||||
loc_off <= 0;
|
||||
cmd_finished <= 1;
|
||||
end
|
||||
`RAM_SHIM_READ_PTR: begin
|
||||
cmd_data_out <= addr;
|
||||
cmd_finished <= 1;
|
||||
end
|
||||
endcase else begin
|
||||
cmd_finished <= 0;
|
||||
end
|
||||
end
|
||||
|
||||
/* Block RAM FIFO controller. */
|
||||
|
||||
reg read_enable = 0;
|
||||
reg write_enable = 0;
|
||||
reg [DAT_WID-1:0] write_dat = 0;
|
||||
wire [DAT_WID-1:0] read_dat;
|
||||
wire empty;
|
||||
wire full;
|
||||
ram_fifo #(
|
||||
.DAT_WID(DAT_WID)
|
||||
) pre_fifo (
|
||||
.clk(clk),
|
||||
.rst(rst),
|
||||
.read_enable(read_enable),
|
||||
.write_enable(write_enable),
|
||||
.write_dat(write_dat),
|
||||
.read_dat(read_dat),
|
||||
.empty(empty),
|
||||
.full(full)
|
||||
);
|
||||
|
||||
/* Code to take data from Block RAM and put it into System RAM. */
|
||||
|
||||
localparam WAIT_ON_EMPTY = 0;
|
||||
localparam READ_OFF_FIFO = 1;
|
||||
localparam HIGH_WORD_LOAD = 2;
|
||||
localparam WAIT_ON_HIGH_WORD = 3;
|
||||
reg [1:0] writestate = WAIT_ON_EMPTY;
|
||||
|
||||
/* Originally the simulation code checked if the intermediate FIFO was
|
||||
* empty, and then stopped running the simulation. This led to an off
|
||||
* by one error where the very last value pushed was not read. Instead,
|
||||
* the simulator now checks for steady-ness, which means that the always
|
||||
* block has idled at the WAIT_ON_EMPTY state for two cycles.
|
||||
*/
|
||||
`ifdef RAM_SHIM_DEBUG
|
||||
reg [1:0] prev_writestate;
|
||||
always @ (posedge clk) prev_writestate <= writestate;
|
||||
assign fifo_steady = prev_writestate == WAIT_ON_EMPTY && writestate == WAIT_ON_EMPTY;
|
||||
`endif
|
||||
|
||||
always @ (posedge clk) begin
|
||||
case (state)
|
||||
WAIT_ON_COMMIT: if (commit) begin
|
||||
word <= data[RAM_WORD-1:0];
|
||||
case (writestate)
|
||||
WAIT_ON_EMPTY: if (!empty) begin
|
||||
writestate <= READ_OFF_FIFO;
|
||||
/* This value is raised on the at the beginning of the
|
||||
* next clock cycle. A read takes one clock cycle, so
|
||||
* the next clock cycle has to disarm read_enable, and
|
||||
* then the cycle *after that* must read the data from
|
||||
* the FIFO.
|
||||
*/
|
||||
read_enable <= 1;
|
||||
end
|
||||
READ_OFF_FIFO: if (read_enable) begin
|
||||
read_enable <= 0;
|
||||
end else begin
|
||||
word <= read_dat[RAM_WORD-1:0];
|
||||
write <= 1;
|
||||
state <= HIGH_WORD_LOAD;
|
||||
writestate <= HIGH_WORD_LOAD;
|
||||
end
|
||||
HIGH_WORD_LOAD: if (valid) begin
|
||||
offset <= offset + (RAM_WORD/2);
|
||||
if (loc_off == loc_len - 1)
|
||||
loc_off <= 0;
|
||||
else
|
||||
loc_off <= loc_off + RAM_WORD/8;
|
||||
|
||||
write <= 0;
|
||||
word <= {{(RAM_WORD*2 - DAT_WID){data[DAT_WID-1]}},
|
||||
data[DAT_WID-1:RAM_WORD]};
|
||||
state <= WAIT_ON_HIGH_WORD;
|
||||
word <= {{(RAM_WORD*2 - DAT_WID){read_dat[DAT_WID-1]}},
|
||||
read_dat[DAT_WID-1:RAM_WORD]};
|
||||
writestate <= WAIT_ON_HIGH_WORD;
|
||||
end
|
||||
WAIT_ON_HIGH_WORD: if (!write) begin
|
||||
write <= 1;
|
||||
end else if (valid) begin
|
||||
offset <= offset + (RAM_WORD / 2);
|
||||
state <= WAIT_ON_COMMIT_DEASSERT;
|
||||
finished <= 1;
|
||||
end
|
||||
WAIT_ON_COMMIT_DEASSERT: if (!commit) begin
|
||||
finished <= 0;
|
||||
if (loc_off == loc_len - 1)
|
||||
loc_off <= 0;
|
||||
else
|
||||
loc_off <= loc_off + RAM_WORD/8;
|
||||
writestate <= WAIT_ON_EMPTY;
|
||||
write <= 0;
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
/* read to memory */
|
||||
always @ (posedge clk) begin
|
||||
if (data_commit && !write_enable && !full) begin
|
||||
write_dat <= data;
|
||||
write_enable <= 1;
|
||||
end else if (data_commit && write_enable) begin
|
||||
write_enable <= 0;
|
||||
finished <= 1;
|
||||
end else if (!data_commit && finished) begin
|
||||
finished <= 0;
|
||||
write_enable <= 0;
|
||||
end
|
||||
end
|
||||
|
||||
/*
|
||||
`ifdef VERILATOR
|
||||
initial begin
|
||||
$dumpfile("ram_shim.vcd");
|
||||
$dumpvars;
|
||||
end
|
||||
`endif
|
||||
*/
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -0,0 +1,5 @@
|
|||
`define RAM_SHIM_NO_OP 0
|
||||
`define RAM_SHIM_WRITE_LOC 1
|
||||
`define RAM_SHIM_WRITE_LEN 2
|
||||
`define RAM_SHIM_READ_PTR 3
|
||||
`define RAM_SHIM_CMD_WID 8
|
|
@ -0,0 +1,207 @@
|
|||
#include <memory>
|
||||
#include <cassert>
|
||||
#include <limits>
|
||||
#include <cstdint>
|
||||
#include <cstring>
|
||||
#include <cstdlib>
|
||||
#include <iostream>
|
||||
#include <random>
|
||||
#include <unistd.h>
|
||||
#include <verilated.h>
|
||||
#include "ram_shim_cmds.h"
|
||||
|
||||
#include "Vram_shim.h"
|
||||
using ModType = Vram_shim;
|
||||
ModType *mod;
|
||||
|
||||
uint32_t main_time = 0;
|
||||
|
||||
double sc_time_stamp() {
|
||||
return main_time;
|
||||
}
|
||||
|
||||
static void run_clock() {
|
||||
for (int i = 0; i < 2; i++) {
|
||||
mod->clk = !mod->clk;
|
||||
mod->eval();
|
||||
main_time++;
|
||||
}
|
||||
}
|
||||
|
||||
static void cleanup_exit() {
|
||||
mod->final();
|
||||
delete mod;
|
||||
}
|
||||
|
||||
static void init(int argc, char **argv) {
|
||||
Verilated::commandArgs(argc, argv);
|
||||
Verilated::traceEverOn(true);
|
||||
mod = new ModType;
|
||||
mod->clk = 0;
|
||||
atexit(cleanup_exit);
|
||||
|
||||
char *seed = getenv("RANDOM_SEED");
|
||||
if (seed) {
|
||||
unsigned long i = strtoul(seed, NULL, 10);
|
||||
srand((unsigned int)i);
|
||||
}
|
||||
}
|
||||
|
||||
static void init_values() {
|
||||
mod->rst = 0;
|
||||
mod->cmd_data = 0;
|
||||
mod->cmd = 0;
|
||||
mod->cmd_active = 0;
|
||||
|
||||
mod->data = 0;
|
||||
mod->data_commit = 0;
|
||||
mod->valid = 0;
|
||||
}
|
||||
|
||||
using V = uint32_t;
|
||||
|
||||
// Verilator makes all ports unsigned, even when marked as signed in
|
||||
// Verilog.
|
||||
V sign_extend(V x, unsigned len) {
|
||||
// if high bit is 1
|
||||
if (x >> (len - 1) & 1) {
|
||||
// This mask selects all bits below the highest bit.
|
||||
// By inverting it, it selects the highest bit, and all
|
||||
// higher bits that must be sign extended.
|
||||
V mask = (1 << len) - 1;
|
||||
// Set all high bits to 1. The mask has all bits lower
|
||||
// than the highest bit 0, so the bits in "x" pass through.
|
||||
return ~mask | x;
|
||||
} else {
|
||||
return x;
|
||||
}
|
||||
}
|
||||
#define MASK_TO(x,n) ((x) & ((1 << (n)) - 1))
|
||||
|
||||
/* Test memory buffering and memory interface.
|
||||
* The memory interface takes 16 bits integers at a time. The ram interface
|
||||
* runs slower than the insertion loop, to test buffering.
|
||||
*
|
||||
* The values given to the Verilog module are also stored in memory as
|
||||
* 32 bit integers. These are compared with the memory that simulates the
|
||||
* RAM interface.
|
||||
*/
|
||||
|
||||
#define MEMORY_LEN 1000 // How many 32 bit integers
|
||||
#define MEMORY_LEN_16BIT MEMORY_LEN*2 // How many 16 bit parts
|
||||
#define MEMORY_START 0x10241024
|
||||
static std::array<uint16_t, MEMORY_LEN_16BIT> backing_memory;
|
||||
static std::array<bool, MEMORY_LEN_16BIT> backing_memory_accessed;
|
||||
#define MEMORY_WAIT_TIME 50
|
||||
|
||||
static void handle_memory() {
|
||||
// Memory counter is used to simulate RAM delay.
|
||||
// TODO; random ram delay
|
||||
static uint32_t memory_counter = 0;
|
||||
|
||||
if (mod->write) {
|
||||
if (memory_counter == MEMORY_WAIT_TIME) {
|
||||
mod->valid = 1;
|
||||
return;
|
||||
}
|
||||
|
||||
if (memory_counter == 0) {
|
||||
uint32_t memory_access_ind = 0;
|
||||
|
||||
assert(mod->addr >= MEMORY_START);
|
||||
memory_access_ind = mod->addr - MEMORY_START;
|
||||
|
||||
// Addresses are bytes, but writes are always 16 bits.
|
||||
// Ensure we are writing to a 16 bit boundary.
|
||||
assert(memory_access_ind % 2 == 0);
|
||||
memory_access_ind /= 2;
|
||||
|
||||
// Check to make sure that the RAM interface is not overwriting
|
||||
// memory locations. For now, it should not do that.
|
||||
assert(!backing_memory_accessed[memory_access_ind]);
|
||||
backing_memory_accessed[memory_access_ind] = true;
|
||||
|
||||
assert(memory_access_ind < MEMORY_LEN_16BIT);
|
||||
backing_memory[memory_access_ind] = mod->word;
|
||||
// printf("RAM end: %x @ %d\n", backing_memory[memory_access_ind], memory_access_ind);
|
||||
}
|
||||
memory_counter++;
|
||||
} else {
|
||||
mod->valid = 0;
|
||||
assert(memory_counter == MEMORY_WAIT_TIME || memory_counter == 0);
|
||||
memory_counter = 0;
|
||||
}
|
||||
}
|
||||
|
||||
static void init_memory() {
|
||||
mod->cmd_data = MEMORY_LEN;
|
||||
mod->cmd = RAM_SHIM_WRITE_LEN;
|
||||
|
||||
mod->cmd_active = 1;
|
||||
while (!mod->cmd_finished)
|
||||
run_clock();
|
||||
mod->cmd_active = 0;
|
||||
run_clock();
|
||||
|
||||
mod->cmd_data = MEMORY_START;
|
||||
mod->cmd = RAM_SHIM_WRITE_LOC;
|
||||
|
||||
mod->cmd_active = 1;
|
||||
while (!mod->cmd_finished)
|
||||
run_clock();
|
||||
mod->cmd_active = 0;
|
||||
run_clock();
|
||||
}
|
||||
|
||||
static std::array<uint32_t, MEMORY_LEN> generated_memory;
|
||||
constexpr int CYCLE_WAIT = 10;
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
init(argc, argv);
|
||||
init_values();
|
||||
init_memory();
|
||||
|
||||
/* Every CYCLE_WAIT cycles, push one value to RAM.
|
||||
* This should be smaller than the amount of time it takes for
|
||||
* the ram to "process" the added value.
|
||||
*/
|
||||
int i = 0;
|
||||
int cntr = 0;
|
||||
while (i < MEMORY_LEN) {
|
||||
run_clock();
|
||||
handle_memory();
|
||||
|
||||
if (cntr == CYCLE_WAIT) {
|
||||
if (!mod->finished && !mod->data_commit) {
|
||||
generated_memory[i] = sign_extend(MASK_TO(rand(), 24), 24);
|
||||
// printf("Sending: %d, %x\n", i, generated_memory[i]);
|
||||
mod->data = generated_memory[i];
|
||||
mod->data_commit = 1;
|
||||
} else if (mod->finished && mod->data_commit) {
|
||||
mod->data_commit = 0;
|
||||
i++;
|
||||
cntr = 0;
|
||||
}
|
||||
} else {
|
||||
cntr++;
|
||||
}
|
||||
}
|
||||
|
||||
fprintf(stderr, "Waiting on bram\n");
|
||||
while (!mod->fifo_steady) {
|
||||
run_clock();
|
||||
handle_memory();
|
||||
}
|
||||
handle_memory();
|
||||
fprintf(stderr, "Bram complete\n");
|
||||
|
||||
for (i = 0; i < MEMORY_LEN_16BIT; i+=2) {
|
||||
uint32_t nv = (uint32_t)backing_memory[i+1] << 16 | backing_memory[i];
|
||||
if (generated_memory[i/2] != nv) {
|
||||
fprintf(stderr, "%d: %x != %x\n", i, generated_memory[i/2], nv);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
Loading…
Reference in New Issue