cpu: Add initial NEORV32 support (From out of tree prototyping in litex_neorv32_test repo).

- Only configured for rv32i for now (need to create variants).
- I/DBus interfaces probably not optimal (latency).
- Converted from VHDL to Verilog through GHDL-Yosys-Synth (should also support direct VHDL use with toolchains supporting it).
- Interrupt not yet implemented.
- Running in simulation with litex_sim --cpu-type=neorv32.
- Running on Arty with: python3 -m litex_boards.targets.digilent_arty --cpu-type=neorv32 --build --load:
        __   _ __      _  __
       / /  (_) /____ | |/_/
      / /__/ / __/ -_)>  <
     /____/_/\__/\__/_/|_|
   Build your hardware, easily!

 (c) Copyright 2012-2022 Enjoy-Digital
 (c) Copyright 2007-2015 M-Labs

 BIOS built on Feb 14 2022 16:10:24
 BIOS CRC passed (83edf3c3)

 Migen git sha1: ac70301
 LiteX git sha1: 0d218306

--=============== SoC ==================--
CPU:		NEORV32 @ 100MHz
BUS:		WISHBONE 32-bit @ 4GiB
CSR:		32-bit data
ROM:		128KiB
SRAM:		8KiB
L2:		8KiB
SDRAM:		524288KiB 16-bit @ 800MT/s (CL-7 CWL-5)

--========== Initialization ============--
Initializing SDRAM @0x40000000...
Switching SDRAM to software control.
Read leveling:
  m0, b00: |00000000000000000000000000000000| delays: -
  m0, b01: |11000000000000000000000000000000| delays: 01+-01
  m0, b02: |00011111111111111000000000000000| delays: 10+-07
  m0, b03: |00000000000000000000111111111111| delays: 26+-06
  m0, b04: |00000000000000000000000000000000| delays: -
  m0, b05: |00000000000000000000000000000000| delays: -
  m0, b06: |00000000000000000000000000000000| delays: -
  m0, b07: |00000000000000000000000000000000| delays: -
  best: m0, b02 delays: 10+-07
  m1, b00: |00000000000000000000000000000000| delays: -
  m1, b01: |10000000000000000000000000000000| delays: 00+-00
  m1, b02: |00111111111111111000000000000000| delays: 09+-07
  m1, b03: |00000000000000000001111111111111| delays: 25+-06
  m1, b04: |00000000000000000000000000000000| delays: -
  m1, b05: |00000000000000000000000000000000| delays: -
  m1, b06: |00000000000000000000000000000000| delays: -
  m1, b07: |00000000000000000000000000000000| delays: -
  best: m1, b02 delays: 09+-07
Switching SDRAM to hardware control.
Memtest at 0x40000000 (2.0MiB)...
  Write: 0x40000000-0x40200000 2.0MiB
   Read: 0x40000000-0x40200000 2.0MiB
Memtest OK
Memspeed at 0x40000000 (Sequential, 2.0MiB)...
  Write speed: 9.3MiB/s
   Read speed: 13.2MiB/s

--============== Boot ==================--
Booting from serial...
Press Q or ESC to abort boot completely.
sL5DdSMmkekro
Timeout
No boot medium found

--============= Console ================--

litex>
This commit is contained in:
Florent Kermarrec 2022-02-17 09:50:24 +01:00
parent d37ef60e70
commit 38a047bed1
7 changed files with 448 additions and 0 deletions

View file

@ -0,0 +1 @@
from litex.soc.cores.cpu.neorv32.core import NEORV32

View file

@ -0,0 +1,4 @@
.section .text, "ax", @progbits
.global boot_helper
boot_helper:
jr x13

View file

@ -0,0 +1,194 @@
#
# This file is part of LiteX.
#
# Copyright (c) 2022 Florent Kermarrec <florent@enjoy-digital.fr>
# SPDX-License-Identifier: BSD-2-Clause
import os
from migen import *
from litex.soc.interconnect import wishbone
from litex.soc.cores.cpu import CPU, CPU_GCC_TRIPLE_RISCV32
# Variants -----------------------------------------------------------------------------------------
CPU_VARIANTS = ["standard"]
# GCC Flags ----------------------------------------------------------------------------------------
GCC_FLAGS = {
# /-------- Base ISA
# |/------- Hardware Multiply + Divide
# ||/----- Atomics
# |||/---- Compressed ISA
# ||||/--- Single-Precision Floating-Point
# |||||/-- Double-Precision Floating-Point
# imacfd
"standard": "-march=rv32i -mabi=ilp32",
}
# NEORV32 ------------------------------------------------------------------------------------------
class NEORV32(CPU):
family = "riscv"
name = "neorv32"
human_name = "NEORV32"
variants = CPU_VARIANTS
data_width = 32
endianness = "little"
gcc_triple = CPU_GCC_TRIPLE_RISCV32
linker_output_format = "elf32-littleriscv"
nop = "nop"
io_regions = {0x80000000: 0x80000000} # Origin, Length.
# GCC Flags.
@property
def gcc_flags(self):
flags = GCC_FLAGS[self.variant]
flags += " -D__neorv32__ "
return flags
def __init__(self, platform, variant="standard"):
self.platform = platform
self.variant = variant
self.reset = Signal()
self.ibus = ibus = wishbone.Interface()
self.dbus = dbus = wishbone.Interface()
self.periph_buses = [ibus, dbus] # Peripheral buses (Connected to main SoC's bus).
self.memory_buses = [] # Memory buses (Connected directly to LiteDRAM).
# # #
class Open(Signal) : pass
# IBus Adaptations. FIXME: Works but not optimal (latency).
ibus_we = Signal()
ibus_re = Signal()
self.sync += [
# Clear Cyc/Stb on Ack.
If(ibus.ack,
ibus.cyc.eq(0),
ibus.stb.eq(0),
),
# Set Cyc/Stb on We/Re.
If(ibus_we | ibus_re,
ibus.cyc.eq(1),
ibus.stb.eq(1),
ibus.we.eq(ibus_we)
)
]
# DBus Adaptations. FIXME: Works but not optimal (latency).
dbus_we = Signal()
dbus_re = Signal()
self.sync += [
# Clear Cyc/Stb on Ack.
If(dbus.ack,
dbus.cyc.eq(0),
dbus.stb.eq(0),
),
# Set Cyc/Stb on We/Re.
If(dbus_we | dbus_re,
dbus.cyc.eq(1),
dbus.stb.eq(1),
dbus.we.eq(dbus_we)
)
]
# CPU Instance.
self.specials += Instance("neorv32_cpu_wrapper",
# Global Control.
i_clk_i = ClockSignal("sys"),
i_rstn_i = ~(ResetSignal() | self.reset),
o_sleep_o = Open(),
o_debug_o = Open(),
i_db_halt_req_i = 0,
# Instruction Bus.
o_i_bus_addr_o = Cat(Signal(2), ibus.adr),
i_i_bus_rdata_i = ibus.dat_r,
o_i_bus_wdata_o = ibus.dat_w,
o_i_bus_ben_o = ibus.sel,
o_i_bus_we_o = ibus_we,
o_i_bus_re_o = ibus_re,
o_i_bus_lock_o = Open(), # FIXME.
i_i_bus_ack_i = ibus.ack,
i_i_bus_err_i = ibus.err,
o_i_bus_fence_o = Open(), # FIXME.
o_i_bus_priv_o = Open(), # FIXME.
# Data Bus.
o_d_bus_addr_o = Cat(Signal(2), dbus.adr),
i_d_bus_rdata_i = dbus.dat_r,
o_d_bus_wdata_o = dbus.dat_w,
o_d_bus_ben_o = dbus.sel,
o_d_bus_we_o = dbus_we,
o_d_bus_re_o = dbus_re,
o_d_bus_lock_o = Open(), # FIXME.
i_d_bus_ack_i = dbus.ack,
i_d_bus_err_i = dbus.err,
o_d_bus_fence_o = Open(), # FIXME.
o_d_bus_priv_o = Open(), # FIXME.
# System Time.
i_time_i = 0, # FIXME.
# Interrupts.
i_msw_irq_i = 0, # FIXME.
i_mext_irq_i = 0, # FIXME.
i_mtime_irq_i = 0, # FIXME.
i_firq_i = 0 # FIXME.
)
# Add Verilog sources
self.add_sources(platform)
def set_reset_address(self, reset_address):
self.reset_address = reset_address
assert reset_address == 0x00000000
@staticmethod
def add_sources(platform):
cdir = os.path.abspath(os.path.dirname(__file__))
# List VHDL sources.
sources = [
"neorv32_package.vhd", # Main CPU & Processor package file.
"neorv32_fifo.vhd", # FIFO.
"neorv32_cpu.vhd", # CPU top entity.
"neorv32_cpu_alu.vhd", # Arithmetic/logic unit.
"neorv32_cpu_cp_bitmanip.vhd", # Bit-manipulation co-processor.
"neorv32_cpu_cp_cfu.vhd", # Custom instructions co-processor.
"neorv32_cpu_cp_fpu.vhd", # Single-precision FPU co-processor.
"neorv32_cpu_cp_muldiv.vhd", # Integer multiplier/divider co-processor.
"neorv32_cpu_cp_shifter.vhd", # Base ISA shifter unit.
"neorv32_cpu_bus.vhd", # Instruction and data bus interface unit.
"neorv32_cpu_control.vhd", # CPU control and CSR system.
"neorv32_cpu_decompressor.vhd", # Compressed instructions decoder.
"neorv32_cpu_regfile.vhd", # Data register file.
"neorv32_cpu_wrapper.vhd", # CPU top entity + default generics.
]
# Download VHDL sources (if not already present).
for source in sources:
if not os.path.exists(os.path.join(cdir, source)):
os.system(f"wget https://raw.githubusercontent.com/stnolting/neorv32/main/rtl/core/{source} -P {cdir}")
# Convert VHDL to Verilog through GHDL/Yosys.
from litex.build import tools
import subprocess
cdir = os.path.dirname(__file__)
ys = []
ys.append("ghdl --ieee=synopsys -fexplicit -frelaxed-rules --std=08 --work=neorv32 \\")
for source in sources:
ys.append(os.path.join(cdir, source) + " \\")
ys.append("-e neorv32_cpu_wrapper")
ys.append("chformal -assert -remove")
ys.append("write_verilog {}".format(os.path.join(cdir, "neorv32.v")))
tools.write_to_file(os.path.join(cdir, "neorv32.ys"), "\n".join(ys))
if subprocess.call(["yosys", "-q", "-m", "ghdl", os.path.join(cdir, "neorv32.ys")]):
raise OSError("Unable to convert NEORV32 CPU to verilog, please check your GHDL-Yosys-plugin install.")
platform.add_source(os.path.join(cdir, "neorv32.v"))
def do_finalize(self):
assert hasattr(self, "reset_address")

View file

@ -0,0 +1,75 @@
#define MIE_MEIE 0x800
.global _start
_start:
j reset_vector
reset_vector:
la sp, _fstack
la t0, trap_vector
csrw mtvec, t0
// initialize .data
la t0, _fdata
la t1, _edata
la t2, _fdata_rom
1: beq t0, t1, 2f
lw t3, 0(t2)
sw t3, 0(t0)
addi t0, t0, 4
addi t2, t2, 4
j 1b
2:
// initialize .bss
la t0, _fbss
la t1, _ebss
1: beq t0, t1, 3f
sw zero, 0(t0)
addi t0, t0, 4
j 1b
3:
// enable external interrupts
li t0, MIE_MEIE
csrs mie, t0
call main
1: j 1b
trap_vector:
addi sp, sp, -16*4
sw ra, 0*4(sp)
sw t0, 1*4(sp)
sw t1, 2*4(sp)
sw t2, 3*4(sp)
sw a0, 4*4(sp)
sw a1, 5*4(sp)
sw a2, 6*4(sp)
sw a3, 7*4(sp)
sw a4, 8*4(sp)
sw a5, 9*4(sp)
sw a6, 10*4(sp)
sw a7, 11*4(sp)
sw t3, 12*4(sp)
sw t4, 13*4(sp)
sw t5, 14*4(sp)
sw t6, 15*4(sp)
call isr
lw ra, 0*4(sp)
lw t0, 1*4(sp)
lw t1, 2*4(sp)
lw t2, 3*4(sp)
lw a0, 4*4(sp)
lw a1, 5*4(sp)
lw a2, 6*4(sp)
lw a3, 7*4(sp)
lw a4, 8*4(sp)
lw a5, 9*4(sp)
lw a6, 10*4(sp)
lw a7, 11*4(sp)
lw t3, 12*4(sp)
lw t4, 13*4(sp)
lw t5, 14*4(sp)
lw t6, 15*4(sp)
addi sp, sp, 16*4
mret

View file

@ -0,0 +1,4 @@
#ifndef __IRQ_H
#define __IRQ_H
#endif /* __IRQ_H */

View file

@ -0,0 +1,151 @@
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
library neorv32;
use neorv32.neorv32_package.all;
entity neorv32_cpu_wrapper is
generic (
-- General --
HW_THREAD_ID : natural := 0; -- hardware thread id (32-bit)
CPU_BOOT_ADDR : std_ulogic_vector(31 downto 0) := x"00000000"; -- cpu boot address
CPU_DEBUG_ADDR : std_ulogic_vector(31 downto 0) := x"00000000"; -- cpu debug mode start address
-- RISC-V CPU Extensions --
CPU_EXTENSION_RISCV_A : boolean := false; -- implement atomic extension?
CPU_EXTENSION_RISCV_B : boolean := false; -- implement bit-manipulation extension?
CPU_EXTENSION_RISCV_C : boolean := false; -- implement compressed extension?
CPU_EXTENSION_RISCV_E : boolean := false; -- implement embedded RF extension?
CPU_EXTENSION_RISCV_M : boolean := true; -- implement muld/div extension?
CPU_EXTENSION_RISCV_U : boolean := true; -- implement user mode extension?
CPU_EXTENSION_RISCV_Zfinx : boolean := false; -- implement 32-bit floating-point extension (using INT reg!)
CPU_EXTENSION_RISCV_Zicsr : boolean := true; -- implement CSR system?
CPU_EXTENSION_RISCV_Zicntr : boolean := true; -- implement base counters?
CPU_EXTENSION_RISCV_Zihpm : boolean := false; -- implement hardware performance monitors?
CPU_EXTENSION_RISCV_Zifencei : boolean := false; -- implement instruction stream sync.?
CPU_EXTENSION_RISCV_Zmmul : boolean := false; -- implement multiply-only M sub-extension?
CPU_EXTENSION_RISCV_Zxcfu : boolean := false; -- implement custom (instr.) functions unit?
CPU_EXTENSION_RISCV_DEBUG : boolean := false; -- implement CPU debug mode?
-- Extension Options --
FAST_MUL_EN : boolean := true; -- use DSPs for M extension's multiplier
FAST_SHIFT_EN : boolean := true; -- use barrel shifter for shift operations
CPU_CNT_WIDTH : natural := 32; -- total width of CPU cycle and instret counters (0..64)
CPU_IPB_ENTRIES : natural := 4; -- entries is instruction prefetch buffer, has to be a power of 2
-- Physical Memory Protection (PMP) --
PMP_NUM_REGIONS : natural := 4; -- number of regions (0..64)
PMP_MIN_GRANULARITY : natural := 8; -- minimal region granularity in bytes, has to be a power of 2, min 8 bytes
-- Hardware Performance Monitors (HPM) --
HPM_NUM_CNTS : natural := 0; -- number of implemented HPM counters (0..29)
HPM_CNT_WIDTH : natural := 32 -- total size of HPM counters (0..64)
);
port (
-- global control --
clk_i : in std_ulogic; -- global clock, rising edge
rstn_i : in std_ulogic; -- global reset, low-active, async
sleep_o : out std_ulogic; -- cpu is in sleep mode when set
debug_o : out std_ulogic; -- cpu is in debug mode when set
-- instruction bus interface --
i_bus_addr_o : out std_ulogic_vector(data_width_c-1 downto 0); -- bus access address
i_bus_rdata_i : in std_ulogic_vector(data_width_c-1 downto 0); -- bus read data
i_bus_wdata_o : out std_ulogic_vector(data_width_c-1 downto 0); -- bus write data
i_bus_ben_o : out std_ulogic_vector(03 downto 0); -- byte enable
i_bus_we_o : out std_ulogic; -- write enable
i_bus_re_o : out std_ulogic; -- read enable
i_bus_lock_o : out std_ulogic; -- exclusive access request
i_bus_ack_i : in std_ulogic; -- bus transfer acknowledge
i_bus_err_i : in std_ulogic; -- bus transfer error
i_bus_fence_o : out std_ulogic; -- executed FENCEI operation
i_bus_priv_o : out std_ulogic_vector(1 downto 0); -- privilege level
-- data bus interface --
d_bus_addr_o : out std_ulogic_vector(data_width_c-1 downto 0); -- bus access address
d_bus_rdata_i : in std_ulogic_vector(data_width_c-1 downto 0); -- bus read data
d_bus_wdata_o : out std_ulogic_vector(data_width_c-1 downto 0); -- bus write data
d_bus_ben_o : out std_ulogic_vector(03 downto 0); -- byte enable
d_bus_we_o : out std_ulogic; -- write enable
d_bus_re_o : out std_ulogic; -- read enable
d_bus_lock_o : out std_ulogic; -- exclusive access request
d_bus_ack_i : in std_ulogic; -- bus transfer acknowledge
d_bus_err_i : in std_ulogic; -- bus transfer error
d_bus_fence_o : out std_ulogic; -- executed FENCE operation
d_bus_priv_o : out std_ulogic_vector(1 downto 0); -- privilege level
-- system time input from MTIME --
time_i : in std_ulogic_vector(63 downto 0); -- current system time
-- interrupts (risc-v compliant) --
msw_irq_i : in std_ulogic;-- machine software interrupt
mext_irq_i : in std_ulogic;-- machine external interrupt
mtime_irq_i : in std_ulogic;-- machine timer interrupt
-- fast interrupts (custom) --
firq_i : in std_ulogic_vector(15 downto 0);
-- debug mode (halt) request --
db_halt_req_i : in std_ulogic
);
end neorv32_cpu_wrapper;
architecture neorv32_cpu_wrapper_rtl of neorv32_cpu_wrapper is
begin
neorv32_cpu_inst: neorv32_cpu
generic map (
HW_THREAD_ID => HW_THREAD_ID ,
CPU_BOOT_ADDR => CPU_BOOT_ADDR ,
CPU_DEBUG_ADDR => CPU_DEBUG_ADDR ,
CPU_EXTENSION_RISCV_A => CPU_EXTENSION_RISCV_A ,
CPU_EXTENSION_RISCV_B => CPU_EXTENSION_RISCV_B ,
CPU_EXTENSION_RISCV_C => CPU_EXTENSION_RISCV_C ,
CPU_EXTENSION_RISCV_E => CPU_EXTENSION_RISCV_E ,
CPU_EXTENSION_RISCV_M => CPU_EXTENSION_RISCV_M ,
CPU_EXTENSION_RISCV_U => CPU_EXTENSION_RISCV_U ,
CPU_EXTENSION_RISCV_Zfinx => CPU_EXTENSION_RISCV_Zfinx ,
CPU_EXTENSION_RISCV_Zicsr => CPU_EXTENSION_RISCV_Zicsr ,
CPU_EXTENSION_RISCV_Zicntr => CPU_EXTENSION_RISCV_Zicntr ,
CPU_EXTENSION_RISCV_Zihpm => CPU_EXTENSION_RISCV_Zihpm ,
CPU_EXTENSION_RISCV_Zifencei => CPU_EXTENSION_RISCV_Zifencei,
CPU_EXTENSION_RISCV_Zmmul => CPU_EXTENSION_RISCV_Zmmul ,
CPU_EXTENSION_RISCV_Zxcfu => CPU_EXTENSION_RISCV_Zxcfu ,
CPU_EXTENSION_RISCV_DEBUG => CPU_EXTENSION_RISCV_DEBUG ,
FAST_MUL_EN => FAST_MUL_EN ,
FAST_SHIFT_EN => FAST_SHIFT_EN ,
CPU_CNT_WIDTH => CPU_CNT_WIDTH ,
CPU_IPB_ENTRIES => CPU_IPB_ENTRIES ,
PMP_NUM_REGIONS => PMP_NUM_REGIONS ,
PMP_MIN_GRANULARITY => PMP_MIN_GRANULARITY ,
HPM_NUM_CNTS => HPM_NUM_CNTS ,
HPM_CNT_WIDTH => HPM_CNT_WIDTH
)
port map (
clk_i => clk_i ,
rstn_i => rstn_i ,
sleep_o => sleep_o ,
debug_o => debug_o ,
i_bus_addr_o => i_bus_addr_o ,
i_bus_rdata_i => i_bus_rdata_i,
i_bus_wdata_o => i_bus_wdata_o,
i_bus_ben_o => i_bus_ben_o ,
i_bus_we_o => i_bus_we_o ,
i_bus_re_o => i_bus_re_o ,
i_bus_lock_o => i_bus_lock_o ,
i_bus_ack_i => i_bus_ack_i ,
i_bus_err_i => i_bus_err_i ,
i_bus_fence_o => i_bus_fence_o,
i_bus_priv_o => i_bus_priv_o ,
d_bus_addr_o => d_bus_addr_o ,
d_bus_rdata_i => d_bus_rdata_i,
d_bus_wdata_o => d_bus_wdata_o,
d_bus_ben_o => d_bus_ben_o ,
d_bus_we_o => d_bus_we_o ,
d_bus_re_o => d_bus_re_o ,
d_bus_lock_o => d_bus_lock_o ,
d_bus_ack_i => d_bus_ack_i ,
d_bus_err_i => d_bus_err_i ,
d_bus_fence_o => d_bus_fence_o,
d_bus_priv_o => d_bus_priv_o ,
time_i => time_i ,
msw_irq_i => msw_irq_i ,
mext_irq_i => mext_irq_i ,
mtime_irq_i => mtime_irq_i ,
firq_i => firq_i ,
db_halt_req_i => db_halt_req_i
);
end neorv32_cpu_wrapper_rtl;

View file

@ -0,0 +1,19 @@
#ifndef __SYSTEM_H
#define __SYSTEM_H
#ifdef __cplusplus
extern "C" {
#endif
__attribute__((unused)) static void flush_cpu_icache(void){}; /* No instruction cache */
__attribute__((unused)) static void flush_cpu_dcache(void){}; /* No instruction cache */
void flush_l2_cache(void);
void busy_wait(unsigned int ms);
void busy_wait_us(unsigned int us);
#ifdef __cplusplus
}
#endif
#endif /* __SYSTEM_H */