mirror of https://github.com/YosysHQ/picorv32.git
Added TWO_CYCLE_ALU parameter
This commit is contained in:
parent
a97a715987
commit
dd30b57ea6
12
README.md
12
README.md
|
@ -171,6 +171,18 @@ This relaxes the longest data path a bit by adding an additional FF stage
|
|||
at the cost of adding an additional clock cycle delay to the conditional
|
||||
branch instructions.
|
||||
|
||||
*Note: Enabling this parameter will be most effective when retiming (aka
|
||||
"register balancing") is enabled in the synthesis flow.*
|
||||
|
||||
#### TWO_CYCLE_ALU (default = 0)
|
||||
|
||||
This adds an additional FF stage in the ALU data path, improving timing
|
||||
at the cost of an additional clock cycle for all instructions that use
|
||||
the ALU.
|
||||
|
||||
*Note: Enabling this parameter will be most effective when retiming (aka
|
||||
"register balancing") is enabled in the synthesis flow.*
|
||||
|
||||
#### CATCH_MISALIGN (default = 1)
|
||||
|
||||
Set this to 0 to disable the circuitry for catching misaligned memory
|
||||
|
|
176
picorv32.v
176
picorv32.v
|
@ -38,6 +38,7 @@ module picorv32 #(
|
|||
parameter [ 0:0] LATCHED_MEM_RDATA = 0,
|
||||
parameter [ 0:0] TWO_STAGE_SHIFT = 1,
|
||||
parameter [ 0:0] TWO_CYCLE_COMPARE = 0,
|
||||
parameter [ 0:0] TWO_CYCLE_ALU = 0,
|
||||
parameter [ 0:0] CATCH_MISALIGN = 1,
|
||||
parameter [ 0:0] CATCH_ILLINSN = 1,
|
||||
parameter [ 0:0] ENABLE_PCPI = 0,
|
||||
|
@ -530,46 +531,104 @@ module picorv32 #(
|
|||
reg [31:0] next_irq_pending;
|
||||
reg do_waitirq;
|
||||
|
||||
reg [31:0] alu_out, reg_alu_out;
|
||||
reg alu_out_0, reg_alu_out_0;
|
||||
reg alu_wait;
|
||||
reg [31:0] alu_out, alu_out_q;
|
||||
reg alu_out_0, alu_out_0_q;
|
||||
reg alu_wait, alu_wait_2;
|
||||
|
||||
always @* begin
|
||||
alu_out_0 = 'bx;
|
||||
(* parallel_case, full_case *)
|
||||
case (1'b1)
|
||||
instr_beq:
|
||||
alu_out_0 = reg_op1 == reg_op2;
|
||||
instr_bne:
|
||||
alu_out_0 = reg_op1 != reg_op2;
|
||||
instr_bge:
|
||||
alu_out_0 = $signed(reg_op1) >= $signed(reg_op2);
|
||||
instr_bgeu:
|
||||
alu_out_0 = reg_op1 >= reg_op2;
|
||||
is_slti_blt_slt:
|
||||
alu_out_0 = $signed(reg_op1) < $signed(reg_op2);
|
||||
is_sltiu_bltu_sltu:
|
||||
alu_out_0 = reg_op1 < reg_op2;
|
||||
endcase
|
||||
|
||||
alu_out = 'bx;
|
||||
(* parallel_case, full_case *)
|
||||
case (1'b1)
|
||||
is_lui_auipc_jal_jalr_addi_add:
|
||||
alu_out = reg_op1 + reg_op2;
|
||||
instr_sub:
|
||||
alu_out = reg_op1 - reg_op2;
|
||||
is_compare:
|
||||
alu_out = alu_out_0;
|
||||
instr_xori || instr_xor:
|
||||
alu_out = reg_op1 ^ reg_op2;
|
||||
instr_ori || instr_or:
|
||||
alu_out = reg_op1 | reg_op2;
|
||||
instr_andi || instr_and:
|
||||
alu_out = reg_op1 & reg_op2;
|
||||
endcase
|
||||
end
|
||||
|
||||
generate if (TWO_CYCLE_ALU) begin:two_cycle_alu
|
||||
reg [31:0] alu_add_sub;
|
||||
reg [31:0] alu_xor_or_and;
|
||||
reg alu_eq, alu_ltu, alu_lts;
|
||||
|
||||
always @(posedge clk) begin
|
||||
alu_add_sub <= instr_sub ? reg_op1 - reg_op2 : reg_op1 + reg_op2;
|
||||
|
||||
alu_xor_or_and = 'bx;
|
||||
(* parallel_case, full_case *)
|
||||
case (1'b1)
|
||||
instr_xori || instr_xor:
|
||||
alu_xor_or_and = reg_op1 ^ reg_op2;
|
||||
instr_ori || instr_or:
|
||||
alu_xor_or_and = reg_op1 | reg_op2;
|
||||
instr_andi || instr_and:
|
||||
alu_xor_or_and = reg_op1 & reg_op2;
|
||||
endcase
|
||||
|
||||
alu_eq <= reg_op1 == reg_op2;
|
||||
alu_lts <= $signed(reg_op1) < $signed(reg_op2);
|
||||
alu_ltu <= reg_op1 < reg_op2;
|
||||
end
|
||||
|
||||
always @* begin
|
||||
alu_out_0 = 'bx;
|
||||
(* parallel_case, full_case *)
|
||||
case (1'b1)
|
||||
instr_beq:
|
||||
alu_out_0 = alu_eq;
|
||||
instr_bne:
|
||||
alu_out_0 = !alu_eq;
|
||||
instr_bge:
|
||||
alu_out_0 = !alu_lts;
|
||||
instr_bgeu:
|
||||
alu_out_0 = !alu_ltu;
|
||||
is_slti_blt_slt:
|
||||
alu_out_0 = alu_lts;
|
||||
is_sltiu_bltu_sltu:
|
||||
alu_out_0 = alu_ltu;
|
||||
endcase
|
||||
|
||||
alu_out = 'bx;
|
||||
(* parallel_case, full_case *)
|
||||
case (1'b1)
|
||||
is_lui_auipc_jal_jalr_addi_add || instr_sub:
|
||||
alu_out = alu_add_sub;
|
||||
is_compare:
|
||||
alu_out = alu_out_0;
|
||||
|{instr_xori, instr_xor, instr_ori, instr_or, instr_andi, instr_and}:
|
||||
alu_out = alu_xor_or_and;
|
||||
endcase
|
||||
end
|
||||
end else begin:one_cycle_alu
|
||||
always @* begin
|
||||
alu_out_0 = 'bx;
|
||||
(* parallel_case, full_case *)
|
||||
case (1'b1)
|
||||
instr_beq:
|
||||
alu_out_0 = reg_op1 == reg_op2;
|
||||
instr_bne:
|
||||
alu_out_0 = reg_op1 != reg_op2;
|
||||
instr_bge:
|
||||
alu_out_0 = $signed(reg_op1) >= $signed(reg_op2);
|
||||
instr_bgeu:
|
||||
alu_out_0 = reg_op1 >= reg_op2;
|
||||
is_slti_blt_slt:
|
||||
alu_out_0 = $signed(reg_op1) < $signed(reg_op2);
|
||||
is_sltiu_bltu_sltu:
|
||||
alu_out_0 = reg_op1 < reg_op2;
|
||||
endcase
|
||||
|
||||
alu_out = 'bx;
|
||||
(* parallel_case, full_case *)
|
||||
case (1'b1)
|
||||
is_lui_auipc_jal_jalr_addi_add:
|
||||
alu_out = reg_op1 + reg_op2;
|
||||
instr_sub:
|
||||
alu_out = reg_op1 - reg_op2;
|
||||
is_compare:
|
||||
alu_out = alu_out_0;
|
||||
instr_xori || instr_xor:
|
||||
alu_out = reg_op1 ^ reg_op2;
|
||||
instr_ori || instr_or:
|
||||
alu_out = reg_op1 | reg_op2;
|
||||
instr_andi || instr_and:
|
||||
alu_out = reg_op1 & reg_op2;
|
||||
endcase
|
||||
end
|
||||
end endgenerate
|
||||
|
||||
always @(posedge clk) begin
|
||||
trap <= 0;
|
||||
reg_sh <= 'bx;
|
||||
|
@ -578,9 +637,11 @@ module picorv32 #(
|
|||
set_mem_do_rdata = 0;
|
||||
set_mem_do_wdata = 0;
|
||||
|
||||
reg_alu_out <= alu_out;
|
||||
reg_alu_out_0 <= alu_out_0;
|
||||
alu_out_0_q <= alu_out_0;
|
||||
alu_out_q <= alu_out;
|
||||
|
||||
alu_wait <= 0;
|
||||
alu_wait_2 <= 0;
|
||||
|
||||
if (WITH_PCPI && CATCH_ILLINSN) begin
|
||||
if (resetn && pcpi_valid && !pcpi_int_wait) begin
|
||||
|
@ -646,13 +707,13 @@ module picorv32 #(
|
|||
(* parallel_case *)
|
||||
case (1'b1)
|
||||
latched_branch: begin
|
||||
current_pc = latched_store ? (latched_stalu ? reg_alu_out : reg_out) : reg_next_pc;
|
||||
current_pc = latched_store ? (latched_stalu ? alu_out_q : reg_out) : reg_next_pc;
|
||||
`debug($display("ST_RD: %2d 0x%08x, BRANCH 0x%08x", latched_rd, reg_pc + 4, current_pc);)
|
||||
cpuregs[latched_rd] <= reg_pc + 4;
|
||||
end
|
||||
latched_store && !latched_branch: begin
|
||||
`debug($display("ST_RD: %2d 0x%08x", latched_rd, latched_stalu ? reg_alu_out : reg_out);)
|
||||
cpuregs[latched_rd] <= latched_stalu ? reg_alu_out : reg_out;
|
||||
`debug($display("ST_RD: %2d 0x%08x", latched_rd, latched_stalu ? alu_out_q : reg_out);)
|
||||
cpuregs[latched_rd] <= latched_stalu ? alu_out_q : reg_out;
|
||||
end
|
||||
ENABLE_IRQ && irq_state[0]: begin
|
||||
cpuregs[latched_rd] <= current_pc;
|
||||
|
@ -775,7 +836,10 @@ module picorv32 #(
|
|||
is_lui_auipc_jal: begin
|
||||
reg_op1 <= instr_lui ? 0 : reg_pc;
|
||||
reg_op2 <= decoded_imm;
|
||||
mem_do_rinst <= mem_do_prefetch;
|
||||
if (TWO_CYCLE_ALU)
|
||||
alu_wait <= 1;
|
||||
else
|
||||
mem_do_rinst <= mem_do_prefetch;
|
||||
cpu_state <= cpu_state_exec;
|
||||
end
|
||||
ENABLE_IRQ && ENABLE_IRQ_QREGS && instr_getq: begin
|
||||
|
@ -830,7 +894,10 @@ module picorv32 #(
|
|||
`debug($display("LD_RS1: %2d 0x%08x", decoded_rs1, decoded_rs1 ? cpuregs[decoded_rs1] : 0);)
|
||||
reg_op1 <= decoded_rs1 ? cpuregs[decoded_rs1] : 0;
|
||||
reg_op2 <= decoded_imm;
|
||||
mem_do_rinst <= mem_do_prefetch;
|
||||
if (TWO_CYCLE_ALU)
|
||||
alu_wait <= 1;
|
||||
else
|
||||
mem_do_rinst <= mem_do_prefetch;
|
||||
cpu_state <= cpu_state_exec;
|
||||
end
|
||||
default: begin
|
||||
|
@ -850,9 +917,10 @@ module picorv32 #(
|
|||
cpu_state <= cpu_state_shift;
|
||||
end
|
||||
default: begin
|
||||
if (TWO_CYCLE_COMPARE && is_beq_bne_blt_bge_bltu_bgeu)
|
||||
if (TWO_CYCLE_ALU || (TWO_CYCLE_COMPARE && is_beq_bne_blt_bge_bltu_bgeu)) begin
|
||||
alu_wait_2 <= TWO_CYCLE_ALU && (TWO_CYCLE_COMPARE && is_beq_bne_blt_bge_bltu_bgeu);
|
||||
alu_wait <= 1;
|
||||
else
|
||||
end else
|
||||
mem_do_rinst <= mem_do_prefetch;
|
||||
cpu_state <= cpu_state_exec;
|
||||
end
|
||||
|
@ -896,9 +964,10 @@ module picorv32 #(
|
|||
cpu_state <= cpu_state_shift;
|
||||
end
|
||||
default: begin
|
||||
if (TWO_CYCLE_COMPARE && is_beq_bne_blt_bge_bltu_bgeu)
|
||||
if (TWO_CYCLE_ALU || (TWO_CYCLE_COMPARE && is_beq_bne_blt_bge_bltu_bgeu)) begin
|
||||
alu_wait_2 <= TWO_CYCLE_ALU && (TWO_CYCLE_COMPARE && is_beq_bne_blt_bge_bltu_bgeu);
|
||||
alu_wait <= 1;
|
||||
else
|
||||
end else
|
||||
mem_do_rinst <= mem_do_prefetch;
|
||||
cpu_state <= cpu_state_exec;
|
||||
end
|
||||
|
@ -906,17 +975,18 @@ module picorv32 #(
|
|||
end
|
||||
|
||||
cpu_state_exec: begin
|
||||
latched_store <= TWO_CYCLE_COMPARE ? reg_alu_out_0 : alu_out_0;
|
||||
latched_branch <= TWO_CYCLE_COMPARE ? reg_alu_out_0 : alu_out_0;
|
||||
latched_store <= TWO_CYCLE_COMPARE ? alu_out_0_q : alu_out_0;
|
||||
latched_branch <= TWO_CYCLE_COMPARE ? alu_out_0_q : alu_out_0;
|
||||
reg_out <= reg_pc + decoded_imm;
|
||||
if (TWO_CYCLE_COMPARE && alu_wait) begin
|
||||
mem_do_rinst <= mem_do_prefetch;
|
||||
if ((TWO_CYCLE_ALU || TWO_CYCLE_COMPARE) && (alu_wait || alu_wait_2)) begin
|
||||
mem_do_rinst <= mem_do_prefetch && !alu_wait_2;
|
||||
alu_wait <= alu_wait_2;
|
||||
end else
|
||||
if (is_beq_bne_blt_bge_bltu_bgeu) begin
|
||||
latched_rd <= 0;
|
||||
if (mem_done)
|
||||
cpu_state <= cpu_state_fetch;
|
||||
if (TWO_CYCLE_COMPARE ? reg_alu_out_0 : alu_out_0) begin
|
||||
if (TWO_CYCLE_COMPARE ? alu_out_0_q : alu_out_0) begin
|
||||
decoder_trigger <= 0;
|
||||
set_mem_do_rinst = 1;
|
||||
end
|
||||
|
@ -1187,6 +1257,7 @@ module picorv32_axi #(
|
|||
parameter [ 0:0] ENABLE_REGS_DUALPORT = 1,
|
||||
parameter [ 0:0] TWO_STAGE_SHIFT = 1,
|
||||
parameter [ 0:0] TWO_CYCLE_COMPARE = 0,
|
||||
parameter [ 0:0] TWO_CYCLE_ALU = 0,
|
||||
parameter [ 0:0] CATCH_MISALIGN = 1,
|
||||
parameter [ 0:0] CATCH_ILLINSN = 1,
|
||||
parameter [ 0:0] ENABLE_PCPI = 0,
|
||||
|
@ -1283,6 +1354,7 @@ module picorv32_axi #(
|
|||
.ENABLE_REGS_DUALPORT(ENABLE_REGS_DUALPORT),
|
||||
.TWO_STAGE_SHIFT (TWO_STAGE_SHIFT ),
|
||||
.TWO_CYCLE_COMPARE (TWO_CYCLE_COMPARE ),
|
||||
.TWO_CYCLE_ALU (TWO_CYCLE_ALU ),
|
||||
.CATCH_MISALIGN (CATCH_MISALIGN ),
|
||||
.CATCH_ILLINSN (CATCH_ILLINSN ),
|
||||
.ENABLE_PCPI (ENABLE_PCPI ),
|
||||
|
|
Loading…
Reference in New Issue