mirror of https://github.com/YosysHQ/picorv32.git
Improvements in picorv32_pcpi_mul
This commit is contained in:
parent
923ac360ff
commit
1f99de5117
|
@ -194,6 +194,9 @@ CPI numbers for a core built without ENABLE_REGS_DUALPORT.
|
||||||
| indirect jump (jalr) | 6 | 6 |
|
| indirect jump (jalr) | 6 | 6 |
|
||||||
| shift operations | 4-14 | 4-15 |
|
| shift operations | 4-14 | 4-15 |
|
||||||
|
|
||||||
|
When `ENABLE_MUL` is activated, then a `MUL` instruction will execute
|
||||||
|
in 42 cycles and a `MULH[SU|U]` instruction will execute in 74 cycles.
|
||||||
|
|
||||||
Dhrystone benchmark results: 0.309 DMIPS/MHz (544 Dhrystones/Second/MHz)
|
Dhrystone benchmark results: 0.309 DMIPS/MHz (544 Dhrystones/Second/MHz)
|
||||||
|
|
||||||
For the Dhrystone benchmark the average CPI is 4.167.
|
For the Dhrystone benchmark the average CPI is 4.167.
|
||||||
|
|
|
@ -292,6 +292,10 @@ start:
|
||||||
/* break */
|
/* break */
|
||||||
sbreak
|
sbreak
|
||||||
|
|
||||||
|
|
||||||
|
/* Hard mul functions for multest.c
|
||||||
|
**********************************/
|
||||||
|
|
||||||
hard_mul:
|
hard_mul:
|
||||||
mul a0, a0, a1
|
mul a0, a0, a1
|
||||||
ret
|
ret
|
||||||
|
|
63
picorv32.v
63
picorv32.v
|
@ -660,12 +660,6 @@ module picorv32 #(
|
||||||
reg_pc <= current_pc;
|
reg_pc <= current_pc;
|
||||||
reg_next_pc <= current_pc;
|
reg_next_pc <= current_pc;
|
||||||
|
|
||||||
if (WITH_PCPI) begin
|
|
||||||
pcpi_insn_valid <= 0;
|
|
||||||
pcpi_rs1_valid <= 0;
|
|
||||||
pcpi_rs2_valid <= 0;
|
|
||||||
end
|
|
||||||
|
|
||||||
latched_store <= 0;
|
latched_store <= 0;
|
||||||
latched_stalu <= 0;
|
latched_stalu <= 0;
|
||||||
latched_branch <= 0;
|
latched_branch <= 0;
|
||||||
|
@ -726,6 +720,10 @@ module picorv32 #(
|
||||||
reg_sh <= decoded_rs2 ? cpuregs[decoded_rs2] : 0;
|
reg_sh <= decoded_rs2 ? cpuregs[decoded_rs2] : 0;
|
||||||
reg_op2 <= decoded_rs2 ? cpuregs[decoded_rs2] : 0;
|
reg_op2 <= decoded_rs2 ? cpuregs[decoded_rs2] : 0;
|
||||||
if (pcpi_int_ready) begin
|
if (pcpi_int_ready) begin
|
||||||
|
mem_do_rinst <= 1;
|
||||||
|
pcpi_insn_valid <= 0;
|
||||||
|
pcpi_rs1_valid <= 0;
|
||||||
|
pcpi_rs2_valid <= 0;
|
||||||
reg_out <= pcpi_int_rd;
|
reg_out <= pcpi_int_rd;
|
||||||
latched_store <= pcpi_int_rd_valid;
|
latched_store <= pcpi_int_rd_valid;
|
||||||
cpu_state <= cpu_state_fetch;
|
cpu_state <= cpu_state_fetch;
|
||||||
|
@ -848,6 +846,10 @@ module picorv32 #(
|
||||||
if (WITH_PCPI && pcpi_insn_valid) begin
|
if (WITH_PCPI && pcpi_insn_valid) begin
|
||||||
pcpi_rs2_valid <= 1;
|
pcpi_rs2_valid <= 1;
|
||||||
if (pcpi_int_ready) begin
|
if (pcpi_int_ready) begin
|
||||||
|
mem_do_rinst <= 1;
|
||||||
|
pcpi_insn_valid <= 0;
|
||||||
|
pcpi_rs1_valid <= 0;
|
||||||
|
pcpi_rs2_valid <= 0;
|
||||||
reg_out <= pcpi_int_rd;
|
reg_out <= pcpi_int_rd;
|
||||||
latched_store <= pcpi_int_rd_valid;
|
latched_store <= pcpi_int_rd_valid;
|
||||||
cpu_state <= cpu_state_fetch;
|
cpu_state <= cpu_state_fetch;
|
||||||
|
@ -1023,7 +1025,10 @@ endmodule
|
||||||
* picorv32_pcpi_mul
|
* picorv32_pcpi_mul
|
||||||
***************************************************************/
|
***************************************************************/
|
||||||
|
|
||||||
module picorv32_pcpi_mul (
|
module picorv32_pcpi_mul #(
|
||||||
|
// increasing this parameter increases performance and core size
|
||||||
|
parameter STEPS_AT_ONCE = 1
|
||||||
|
) (
|
||||||
input clk, resetn,
|
input clk, resetn,
|
||||||
|
|
||||||
input pcpi_insn_valid,
|
input pcpi_insn_valid,
|
||||||
|
@ -1067,9 +1072,32 @@ module picorv32_pcpi_mul (
|
||||||
end
|
end
|
||||||
|
|
||||||
reg [63:0] rs1, rs2, rd, rdx;
|
reg [63:0] rs1, rs2, rd, rdx;
|
||||||
|
reg [63:0] next_rs1, next_rs2, next_rd, next_rdx, next_rdt;
|
||||||
reg [6:0] mul_counter;
|
reg [6:0] mul_counter;
|
||||||
reg mul_waiting;
|
reg mul_waiting;
|
||||||
reg mul_finish;
|
reg mul_finish;
|
||||||
|
integer i;
|
||||||
|
|
||||||
|
// carry save accumulator
|
||||||
|
always @* begin
|
||||||
|
next_rd = rd;
|
||||||
|
next_rdx = rdx;
|
||||||
|
next_rs1 = rs1;
|
||||||
|
next_rs2 = rs2;
|
||||||
|
|
||||||
|
for (i = 0; i < STEPS_AT_ONCE; i=i+1) begin
|
||||||
|
if (next_rs1[0]) begin
|
||||||
|
next_rdt = (next_rd ^ next_rdx) ^ next_rs2;
|
||||||
|
next_rdx = ((next_rd & next_rdx) | (next_rd & next_rs2) | (next_rdx & next_rs2)) << 1;
|
||||||
|
end else begin
|
||||||
|
next_rdt = next_rd ^ next_rdx;
|
||||||
|
next_rdx = (next_rd & next_rdx) << 1;
|
||||||
|
end
|
||||||
|
next_rd = next_rdt;
|
||||||
|
next_rs1 = next_rs1 >> 1;
|
||||||
|
next_rs2 = next_rs2 << 1;
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
mul_finish <= 0;
|
mul_finish <= 0;
|
||||||
|
@ -1089,21 +1117,16 @@ module picorv32_pcpi_mul (
|
||||||
|
|
||||||
rd <= 0;
|
rd <= 0;
|
||||||
rdx <= 0;
|
rdx <= 0;
|
||||||
mul_counter <= instr_any_mulh ? 64 : 32;
|
mul_counter <= (instr_any_mulh ? 63 - STEPS_AT_ONCE : 31 - STEPS_AT_ONCE);
|
||||||
mul_waiting <= !mul_start;
|
mul_waiting <= !mul_start;
|
||||||
end else begin
|
end else begin
|
||||||
// carry save accumulator
|
rd <= next_rd;
|
||||||
if (rs1[0]) begin
|
rdx <= next_rdx;
|
||||||
rd <= rd ^ rdx ^ rs2;
|
rs1 <= next_rs1;
|
||||||
rdx <= ((rd & rdx) | (rd & rs2) | (rdx & rs2)) << 1;
|
rs2 <= next_rs2;
|
||||||
end else begin
|
|
||||||
rd <= rd ^ rdx;
|
mul_counter <= mul_counter - STEPS_AT_ONCE;
|
||||||
rdx <= (rd & rdx) << 1;
|
if (mul_counter[6]) begin
|
||||||
end
|
|
||||||
rs1 <= rs1 >> 1;
|
|
||||||
rs2 <= rs2 << 1;
|
|
||||||
mul_counter <= mul_counter - 1;
|
|
||||||
if (!mul_counter) begin
|
|
||||||
mul_finish <= 1;
|
mul_finish <= 1;
|
||||||
mul_waiting <= 1;
|
mul_waiting <= 1;
|
||||||
end
|
end
|
||||||
|
|
|
@ -244,10 +244,12 @@ module testbench;
|
||||||
$finish;
|
$finish;
|
||||||
end
|
end
|
||||||
|
|
||||||
|
integer cycle_counter;
|
||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
|
cycle_counter <= resetn ? cycle_counter + 1 : 0;
|
||||||
if (resetn && trap) begin
|
if (resetn && trap) begin
|
||||||
repeat (10) @(posedge clk);
|
repeat (10) @(posedge clk);
|
||||||
$display("TRAP");
|
$display("TRAP after %1d clock cycles", cycle_counter);
|
||||||
$finish;
|
$finish;
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
Loading…
Reference in New Issue