From 34d9dea8c79e38c1abda85fdab5d2fcc75d6a218 Mon Sep 17 00:00:00 2001 From: Clifford Wolf Date: Sun, 7 Jun 2015 20:53:19 +0200 Subject: [PATCH] Added support for dual-port register file --- README.md | 69 ++++++++++++++++++++++++++++++++++++++++-------------- picorv32.v | 26 ++++++++++++++++---- 2 files changed, 74 insertions(+), 21 deletions(-) diff --git a/README.md b/README.md index a215843..9bfa8c3 100644 --- a/README.md +++ b/README.md @@ -43,30 +43,65 @@ memory-mapped peripherals, communicating with each other using the native interface, and communicating with the outside world via AXI4. +Parameters: +----------- + +The following Verilog module parameters can be used to configure the PicoRV32 +core. + +### ENABLE_COUNTERS (default = 1) + +This parameter enables support for the `RDCYCLE[H]`, `RDTIME[H]`, and +`RDINSTRET[H]` instructions. This instructions will cause a hardware +trap (like any other unsupported instruction) if `ENABLE_COUNTERS` is set to zero. + +*Note: Strictly speaking the `RDCYCLE[H]`, `RDTIME[H]`, and `RDINSTRET[H]` +instructions are not optional for an RV32I core. But chances are they are not +going to be missed after the application code has been debugged and profiled. +This instructions are optional for an RV32E core.* + +### ENABLE_REGS_16_31 (default = 1) + +This parameter enables support for registers the `x16`..`x31`. The RV32E ISA +excludes this registers. However, the RV32E ISA spec requires a hardware trap +for when code tries to access this registers. This is not implemented in PicoRV32. + +### ENABLE_REGS_DUALPORT (default = 1) + +The register file can be implemented with two or one read ports. A dual ported +register file improves performance a bit, but can also increase the size of +the core. + + Performance: ------------ -The average Cycles per Instruction (CPI) is 4 to 6, depending on the mix of -instructions in the code. The CPI numbers for the individual instructions are: +*A short reminder: This core is optimized for size, not performance.* -| Instruction | CPI | -| ---------------------| ----:| -| direct jump (jal) | 3 | -| ALU reg + immediate | 3 | -| ALU reg + reg | 4 | -| branch (not taken) | 4 | -| memory load | 5 | -| memory store | 6 | -| branch (taken) | 6 | -| indirect jump (jalr) | 6 | -| shift operations | 4-15 | +Unless stated otherwise, the following numbers apply to a PicoRV32 with +ENABLE_REGS_DUALPORT active and connected to a memory that can accomodate +requests within one clock cycle. -Dhrystone benchmark results: 0.280 DMIPS/MHz (493 Dhrystones/Second/MHz) +The average Cycles per Instruction (CPI) is 4 to 5, depending on the mix of +instructions in the code. The CPI numbers for the individual instructions +can be found in the following table. (The column "CPI (SP)" contains the +CPI numbers for a core built without ENABLE_REGS_DUALPORT.) -For the Dryhstone benchmark the average CPI is 4.606. +| Instruction | CPI | CPI (SP) | +| ---------------------| ----:| --------:| +| direct jump (jal) | 3 | 3 | +| ALU reg + immediate | 3 | 3 | +| ALU reg + reg | 3 | 4 | +| branch (not taken) | 3 | 4 | +| memory load | 5 | 5 | +| memory store | 5 | 6 | +| branch (taken) | 5 | 6 | +| indirect jump (jalr) | 6 | 6 | +| shift operations | 4-14 | 4-15 | -*This numbers apply to systems with memory that can accomodate requests within -one clock cycle. Slower memory will degrade the performance of the processor.* +Dhrystone benchmark results: 0.309 DMIPS/MHz (544 Dhrystones/Second/MHz) + +For the Dryhstone benchmark the average CPI is 4.167. Todos: diff --git a/picorv32.v b/picorv32.v index d1e8e29..d490d5e 100644 --- a/picorv32.v +++ b/picorv32.v @@ -27,7 +27,8 @@ module picorv32 #( parameter ENABLE_COUNTERS = 1, - parameter ENABLE_REGS_16_31 = 1 + parameter ENABLE_REGS_16_31 = 1, + parameter ENABLE_REGS_DUALPORT = 1 ) ( input clk, resetn, output reg trap, @@ -529,6 +530,21 @@ module picorv32 #( reg_op2 <= decoded_imm; mem_do_rinst <= mem_do_prefetch; cpu_state <= cpu_state_exec; + end else if (ENABLE_REGS_DUALPORT) begin +`ifdef DEBUG + $display("LD_RS2: %2d 0x%08x", decoded_rs2, decoded_rs2 ? cpuregs[decoded_rs2] : 0); +`endif + reg_op2 <= decoded_rs2 ? cpuregs[decoded_rs2] : 0; + if (is_sb_sh_sw) begin + cpu_state <= cpu_state_stmem; + mem_do_rinst <= 1; + end else if (is_sll_srl_sra) begin + reg_sh <= decoded_rs2 ? cpuregs[decoded_rs2] : 0; + cpu_state <= cpu_state_shift; + end else begin + mem_do_rinst <= mem_do_prefetch; + cpu_state <= cpu_state_exec; + end end else cpu_state <= cpu_state_ld_rs2; end @@ -689,7 +705,8 @@ endmodule module picorv32_axi #( parameter ENABLE_COUNTERS = 1, - parameter ENABLE_REGS_16_31 = 1 + parameter ENABLE_REGS_16_31 = 1, + parameter ENABLE_REGS_DUALPORT = 1 ) ( input clk, resetn, output trap, @@ -756,8 +773,9 @@ module picorv32_axi #( ); picorv32 #( - .ENABLE_COUNTERS (ENABLE_COUNTERS ), - .ENABLE_REGS_16_31(ENABLE_REGS_16_31) + .ENABLE_COUNTERS (ENABLE_COUNTERS ), + .ENABLE_REGS_16_31 (ENABLE_REGS_16_31 ), + .ENABLE_REGS_DUALPORT(ENABLE_REGS_DUALPORT) ) picorv32_core ( .clk (clk ), .resetn (resetn ),