mirror of https://github.com/YosysHQ/picorv32.git
Added BARREL_SHIFTER config parameter
This commit is contained in:
parent
0e9bdd0082
commit
2fdafb9c16
24
README.md
24
README.md
|
@ -3,7 +3,7 @@ PicoRV32 - A Size-Optimized RISC-V CPU
|
||||||
======================================
|
======================================
|
||||||
|
|
||||||
PicoRV32 is a CPU core that implements the [RISC-V RV32IMC Instruction Set](http://riscv.org/).
|
PicoRV32 is a CPU core that implements the [RISC-V RV32IMC Instruction Set](http://riscv.org/).
|
||||||
It can be configured to be a RV32E, RV32I, RV32IC, RV32IM, or RV32IMC core, and optionally
|
It can be configured as RV32E, RV32I, RV32IC, RV32IM, or RV32IMC core, and optionally
|
||||||
contains a built-in interrupt controller.
|
contains a built-in interrupt controller.
|
||||||
|
|
||||||
Tools (gcc, binutils, etc..) can be obtained via the [RISC-V Website](http://riscv.org/download.html#tab_tools).
|
Tools (gcc, binutils, etc..) can be obtained via the [RISC-V Website](http://riscv.org/download.html#tab_tools).
|
||||||
|
@ -29,7 +29,7 @@ PicoRV32 is free and open hardware licensed under the [ISC license](http://en.wi
|
||||||
Features and Typical Applications
|
Features and Typical Applications
|
||||||
---------------------------------
|
---------------------------------
|
||||||
|
|
||||||
- Small (750-1700 LUTs in 7-Series Xilinx Architecture)
|
- Small (750-2000 LUTs in 7-Series Xilinx Architecture)
|
||||||
- High f<sub>max</sub> (250-450 MHz on 7-Series Xilinx FPGAs)
|
- High f<sub>max</sub> (250-450 MHz on 7-Series Xilinx FPGAs)
|
||||||
- Selectable native memory interface or AXI4-Lite master
|
- Selectable native memory interface or AXI4-Lite master
|
||||||
- Optional IRQ support (using a simple custom ISA)
|
- Optional IRQ support (using a simple custom ISA)
|
||||||
|
@ -170,6 +170,12 @@ of 4 bits and then shift in units of 1 bit. This speeds up shift operations,
|
||||||
but adds additional hardware. Set this parameter to 0 to disable the two-stage
|
but adds additional hardware. Set this parameter to 0 to disable the two-stage
|
||||||
shift to further reduce the size of the core.
|
shift to further reduce the size of the core.
|
||||||
|
|
||||||
|
#### BARREL_SHIFTER (default = 0)
|
||||||
|
|
||||||
|
By default shift operations are performed by successively shifting by a
|
||||||
|
small amount (see `TWO_STAGE_SHIFT` above). With this option set, a barrel
|
||||||
|
shifter is used instead instead.
|
||||||
|
|
||||||
#### TWO_CYCLE_COMPARE (default = 0)
|
#### TWO_CYCLE_COMPARE (default = 0)
|
||||||
|
|
||||||
This relaxes the longest data path a bit by adding an additional FF stage
|
This relaxes the longest data path a bit by adding an additional FF stage
|
||||||
|
@ -294,9 +300,15 @@ in 40 cycles and a `MULH[SU|U]` instruction will execute in 72 cycles.
|
||||||
When `ENABLE_DIV` is activated, then a `DIV[U]/REM[U]` instruction will
|
When `ENABLE_DIV` is activated, then a `DIV[U]/REM[U]` instruction will
|
||||||
execute in 40 cycles.
|
execute in 40 cycles.
|
||||||
|
|
||||||
Dhrystone benchmark results: 0.391 DMIPS/MHz (688 Dhrystones/Second/MHz)
|
When `BARREL_SHIFTER` is activated, a shift operation takes as long as
|
||||||
|
any other ALU operation.
|
||||||
|
|
||||||
For the Dhrystone benchmark the average CPI is 4.110.
|
The following dhrystone benchmark results are for a core with enabled
|
||||||
|
`ENABLE_MUL`, `ENABLE_DIV`, and `BARREL_SHIFTER` options.
|
||||||
|
|
||||||
|
Dhrystone benchmark results: 0.399 DMIPS/MHz (702 Dhrystones/Second/MHz)
|
||||||
|
|
||||||
|
For the Dhrystone benchmark the average CPI is 4.030.
|
||||||
|
|
||||||
|
|
||||||
PicoRV32 Native Memory Interface
|
PicoRV32 Native Memory Interface
|
||||||
|
@ -586,7 +598,7 @@ once in advance.
|
||||||
Evaluation: Timing and Utilization on Xilinx 7-Series FPGAs
|
Evaluation: Timing and Utilization on Xilinx 7-Series FPGAs
|
||||||
-----------------------------------------------------------
|
-----------------------------------------------------------
|
||||||
|
|
||||||
The following evaluations have been performed with Vivado 2015.1.
|
The following evaluations have been performed with Vivado 2015.4.
|
||||||
|
|
||||||
#### Timing on Xilinx 7-Series FPGAs
|
#### Timing on Xilinx 7-Series FPGAs
|
||||||
|
|
||||||
|
@ -622,7 +634,7 @@ for the following three cores:
|
||||||
- **PicoRV32 (regular):** The `picorv32` module in its default configuration.
|
- **PicoRV32 (regular):** The `picorv32` module in its default configuration.
|
||||||
|
|
||||||
- **PicoRV32 (large):** The `picorv32` module with enabled PCPI, IRQ, MUL,
|
- **PicoRV32 (large):** The `picorv32` module with enabled PCPI, IRQ, MUL,
|
||||||
DIV, and COMPRESSED_ISA features.
|
DIV, BARREL_SHIFTER, and COMPRESSED_ISA features.
|
||||||
|
|
||||||
See `make area` in [scripts/vivado/](scripts/vivado/).
|
See `make area` in [scripts/vivado/](scripts/vivado/).
|
||||||
|
|
||||||
|
|
|
@ -27,6 +27,7 @@ module testbench;
|
||||||
wire [3:0] mem_la_wstrb;
|
wire [3:0] mem_la_wstrb;
|
||||||
|
|
||||||
picorv32 #(
|
picorv32 #(
|
||||||
|
.BARREL_SHIFTER(1),
|
||||||
.ENABLE_MUL(1),
|
.ENABLE_MUL(1),
|
||||||
.ENABLE_DIV(1)
|
.ENABLE_DIV(1)
|
||||||
) uut (
|
) uut (
|
||||||
|
|
26
picorv32.v
26
picorv32.v
|
@ -43,6 +43,7 @@ module picorv32 #(
|
||||||
parameter [ 0:0] ENABLE_REGS_DUALPORT = 1,
|
parameter [ 0:0] ENABLE_REGS_DUALPORT = 1,
|
||||||
parameter [ 0:0] LATCHED_MEM_RDATA = 0,
|
parameter [ 0:0] LATCHED_MEM_RDATA = 0,
|
||||||
parameter [ 0:0] TWO_STAGE_SHIFT = 1,
|
parameter [ 0:0] TWO_STAGE_SHIFT = 1,
|
||||||
|
parameter [ 0:0] BARREL_SHIFTER = 0,
|
||||||
parameter [ 0:0] TWO_CYCLE_COMPARE = 0,
|
parameter [ 0:0] TWO_CYCLE_COMPARE = 0,
|
||||||
parameter [ 0:0] TWO_CYCLE_ALU = 0,
|
parameter [ 0:0] TWO_CYCLE_ALU = 0,
|
||||||
parameter [ 0:0] COMPRESSED_ISA = 0,
|
parameter [ 0:0] COMPRESSED_ISA = 0,
|
||||||
|
@ -889,6 +890,7 @@ module picorv32 #(
|
||||||
reg alu_wait, alu_wait_2;
|
reg alu_wait, alu_wait_2;
|
||||||
|
|
||||||
reg [31:0] alu_add_sub;
|
reg [31:0] alu_add_sub;
|
||||||
|
reg [31:0] alu_shl, alu_shr;
|
||||||
reg alu_eq, alu_ltu, alu_lts;
|
reg alu_eq, alu_ltu, alu_lts;
|
||||||
|
|
||||||
generate if (TWO_CYCLE_ALU) begin
|
generate if (TWO_CYCLE_ALU) begin
|
||||||
|
@ -897,6 +899,8 @@ module picorv32 #(
|
||||||
alu_eq <= reg_op1 == reg_op2;
|
alu_eq <= reg_op1 == reg_op2;
|
||||||
alu_lts <= $signed(reg_op1) < $signed(reg_op2);
|
alu_lts <= $signed(reg_op1) < $signed(reg_op2);
|
||||||
alu_ltu <= reg_op1 < reg_op2;
|
alu_ltu <= reg_op1 < reg_op2;
|
||||||
|
alu_shl <= reg_op1 << reg_op2[4:0];
|
||||||
|
alu_shr <= $signed({instr_sra || instr_srai ? reg_op1[31] : 1'b0, reg_op1}) >>> reg_op2[4:0];
|
||||||
end
|
end
|
||||||
end else begin
|
end else begin
|
||||||
always @* begin
|
always @* begin
|
||||||
|
@ -904,6 +908,8 @@ module picorv32 #(
|
||||||
alu_eq = reg_op1 == reg_op2;
|
alu_eq = reg_op1 == reg_op2;
|
||||||
alu_lts = $signed(reg_op1) < $signed(reg_op2);
|
alu_lts = $signed(reg_op1) < $signed(reg_op2);
|
||||||
alu_ltu = reg_op1 < reg_op2;
|
alu_ltu = reg_op1 < reg_op2;
|
||||||
|
alu_shl = reg_op1 << reg_op2[4:0];
|
||||||
|
alu_shr = $signed({instr_sra || instr_srai ? reg_op1[31] : 1'b0, reg_op1}) >>> reg_op2[4:0];
|
||||||
end
|
end
|
||||||
end endgenerate
|
end endgenerate
|
||||||
|
|
||||||
|
@ -938,6 +944,10 @@ module picorv32 #(
|
||||||
alu_out = reg_op1 | reg_op2;
|
alu_out = reg_op1 | reg_op2;
|
||||||
instr_andi || instr_and:
|
instr_andi || instr_and:
|
||||||
alu_out = reg_op1 & reg_op2;
|
alu_out = reg_op1 & reg_op2;
|
||||||
|
BARREL_SHIFTER && (instr_sll || instr_slli):
|
||||||
|
alu_out = alu_shl;
|
||||||
|
BARREL_SHIFTER && (instr_srl || instr_srli || instr_sra || instr_srai):
|
||||||
|
alu_out = alu_shr;
|
||||||
endcase
|
endcase
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -1208,16 +1218,16 @@ module picorv32 #(
|
||||||
cpu_state <= cpu_state_ldmem;
|
cpu_state <= cpu_state_ldmem;
|
||||||
mem_do_rinst <= 1;
|
mem_do_rinst <= 1;
|
||||||
end
|
end
|
||||||
is_slli_srli_srai: begin
|
is_slli_srli_srai && !BARREL_SHIFTER: begin
|
||||||
`debug($display("LD_RS1: %2d 0x%08x", decoded_rs1, decoded_rs1 ? cpuregs[decoded_rs1] : 0);)
|
`debug($display("LD_RS1: %2d 0x%08x", decoded_rs1, decoded_rs1 ? cpuregs[decoded_rs1] : 0);)
|
||||||
reg_op1 <= decoded_rs1 ? cpuregs[decoded_rs1] : 0;
|
reg_op1 <= decoded_rs1 ? cpuregs[decoded_rs1] : 0;
|
||||||
reg_sh <= decoded_rs2;
|
reg_sh <= decoded_rs2;
|
||||||
cpu_state <= cpu_state_shift;
|
cpu_state <= cpu_state_shift;
|
||||||
end
|
end
|
||||||
is_jalr_addi_slti_sltiu_xori_ori_andi: begin
|
is_jalr_addi_slti_sltiu_xori_ori_andi, is_slli_srli_srai && BARREL_SHIFTER: begin
|
||||||
`debug($display("LD_RS1: %2d 0x%08x", decoded_rs1, decoded_rs1 ? cpuregs[decoded_rs1] : 0);)
|
`debug($display("LD_RS1: %2d 0x%08x", decoded_rs1, decoded_rs1 ? cpuregs[decoded_rs1] : 0);)
|
||||||
reg_op1 <= decoded_rs1 ? cpuregs[decoded_rs1] : 0;
|
reg_op1 <= decoded_rs1 ? cpuregs[decoded_rs1] : 0;
|
||||||
reg_op2 <= decoded_imm;
|
reg_op2 <= is_slli_srli_srai && BARREL_SHIFTER ? decoded_rs2 : decoded_imm;
|
||||||
if (TWO_CYCLE_ALU)
|
if (TWO_CYCLE_ALU)
|
||||||
alu_wait <= 1;
|
alu_wait <= 1;
|
||||||
else
|
else
|
||||||
|
@ -1237,7 +1247,7 @@ module picorv32 #(
|
||||||
cpu_state <= cpu_state_stmem;
|
cpu_state <= cpu_state_stmem;
|
||||||
mem_do_rinst <= 1;
|
mem_do_rinst <= 1;
|
||||||
end
|
end
|
||||||
is_sll_srl_sra: begin
|
is_sll_srl_sra && !BARREL_SHIFTER: begin
|
||||||
cpu_state <= cpu_state_shift;
|
cpu_state <= cpu_state_shift;
|
||||||
end
|
end
|
||||||
default: begin
|
default: begin
|
||||||
|
@ -1284,7 +1294,7 @@ module picorv32 #(
|
||||||
cpu_state <= cpu_state_stmem;
|
cpu_state <= cpu_state_stmem;
|
||||||
mem_do_rinst <= 1;
|
mem_do_rinst <= 1;
|
||||||
end
|
end
|
||||||
is_sll_srl_sra: begin
|
is_sll_srl_sra && !BARREL_SHIFTER: begin
|
||||||
cpu_state <= cpu_state_shift;
|
cpu_state <= cpu_state_shift;
|
||||||
end
|
end
|
||||||
default: begin
|
default: begin
|
||||||
|
@ -1299,8 +1309,6 @@ module picorv32 #(
|
||||||
end
|
end
|
||||||
|
|
||||||
cpu_state_exec: begin
|
cpu_state_exec: begin
|
||||||
latched_store <= TWO_CYCLE_COMPARE ? alu_out_0_q : alu_out_0;
|
|
||||||
latched_branch <= TWO_CYCLE_COMPARE ? alu_out_0_q : alu_out_0;
|
|
||||||
reg_out <= reg_pc + decoded_imm;
|
reg_out <= reg_pc + decoded_imm;
|
||||||
if ((TWO_CYCLE_ALU || TWO_CYCLE_COMPARE) && (alu_wait || alu_wait_2)) begin
|
if ((TWO_CYCLE_ALU || TWO_CYCLE_COMPARE) && (alu_wait || alu_wait_2)) begin
|
||||||
mem_do_rinst <= mem_do_prefetch && !alu_wait_2;
|
mem_do_rinst <= mem_do_prefetch && !alu_wait_2;
|
||||||
|
@ -1308,6 +1316,8 @@ module picorv32 #(
|
||||||
end else
|
end else
|
||||||
if (is_beq_bne_blt_bge_bltu_bgeu) begin
|
if (is_beq_bne_blt_bge_bltu_bgeu) begin
|
||||||
latched_rd <= 0;
|
latched_rd <= 0;
|
||||||
|
latched_store <= TWO_CYCLE_COMPARE ? alu_out_0_q : alu_out_0;
|
||||||
|
latched_branch <= TWO_CYCLE_COMPARE ? alu_out_0_q : alu_out_0;
|
||||||
if (mem_done)
|
if (mem_done)
|
||||||
cpu_state <= cpu_state_fetch;
|
cpu_state <= cpu_state_fetch;
|
||||||
if (TWO_CYCLE_COMPARE ? alu_out_0_q : alu_out_0) begin
|
if (TWO_CYCLE_COMPARE ? alu_out_0_q : alu_out_0) begin
|
||||||
|
@ -1704,6 +1714,7 @@ module picorv32_axi #(
|
||||||
parameter [ 0:0] ENABLE_REGS_16_31 = 1,
|
parameter [ 0:0] ENABLE_REGS_16_31 = 1,
|
||||||
parameter [ 0:0] ENABLE_REGS_DUALPORT = 1,
|
parameter [ 0:0] ENABLE_REGS_DUALPORT = 1,
|
||||||
parameter [ 0:0] TWO_STAGE_SHIFT = 1,
|
parameter [ 0:0] TWO_STAGE_SHIFT = 1,
|
||||||
|
parameter [ 0:0] BARREL_SHIFTER = 0,
|
||||||
parameter [ 0:0] TWO_CYCLE_COMPARE = 0,
|
parameter [ 0:0] TWO_CYCLE_COMPARE = 0,
|
||||||
parameter [ 0:0] TWO_CYCLE_ALU = 0,
|
parameter [ 0:0] TWO_CYCLE_ALU = 0,
|
||||||
parameter [ 0:0] COMPRESSED_ISA = 0,
|
parameter [ 0:0] COMPRESSED_ISA = 0,
|
||||||
|
@ -1803,6 +1814,7 @@ module picorv32_axi #(
|
||||||
.ENABLE_REGS_16_31 (ENABLE_REGS_16_31 ),
|
.ENABLE_REGS_16_31 (ENABLE_REGS_16_31 ),
|
||||||
.ENABLE_REGS_DUALPORT(ENABLE_REGS_DUALPORT),
|
.ENABLE_REGS_DUALPORT(ENABLE_REGS_DUALPORT),
|
||||||
.TWO_STAGE_SHIFT (TWO_STAGE_SHIFT ),
|
.TWO_STAGE_SHIFT (TWO_STAGE_SHIFT ),
|
||||||
|
.BARREL_SHIFTER (BARREL_SHIFTER ),
|
||||||
.TWO_CYCLE_COMPARE (TWO_CYCLE_COMPARE ),
|
.TWO_CYCLE_COMPARE (TWO_CYCLE_COMPARE ),
|
||||||
.TWO_CYCLE_ALU (TWO_CYCLE_ALU ),
|
.TWO_CYCLE_ALU (TWO_CYCLE_ALU ),
|
||||||
.COMPRESSED_ISA (COMPRESSED_ISA ),
|
.COMPRESSED_ISA (COMPRESSED_ISA ),
|
||||||
|
|
|
@ -105,6 +105,7 @@ module top_large (
|
||||||
);
|
);
|
||||||
picorv32 #(
|
picorv32 #(
|
||||||
.COMPRESSED_ISA(1),
|
.COMPRESSED_ISA(1),
|
||||||
|
.BARREL_SHIFTER(1),
|
||||||
.ENABLE_PCPI(1),
|
.ENABLE_PCPI(1),
|
||||||
.ENABLE_MUL(1),
|
.ENABLE_MUL(1),
|
||||||
.ENABLE_IRQ(1)
|
.ENABLE_IRQ(1)
|
||||||
|
|
Loading…
Reference in New Issue