Added TWO_CYCLE_ALU parameter

This commit is contained in:
Clifford Wolf 2015-07-08 20:17:03 +02:00
parent a97a715987
commit dd30b57ea6
2 changed files with 136 additions and 52 deletions

View File

@ -171,6 +171,18 @@ This relaxes the longest data path a bit by adding an additional FF stage
at the cost of adding an additional clock cycle delay to the conditional at the cost of adding an additional clock cycle delay to the conditional
branch instructions. branch instructions.
*Note: Enabling this parameter will be most effective when retiming (aka
"register balancing") is enabled in the synthesis flow.*
#### TWO_CYCLE_ALU (default = 0)
This adds an additional FF stage in the ALU data path, improving timing
at the cost of an additional clock cycle for all instructions that use
the ALU.
*Note: Enabling this parameter will be most effective when retiming (aka
"register balancing") is enabled in the synthesis flow.*
#### CATCH_MISALIGN (default = 1) #### CATCH_MISALIGN (default = 1)
Set this to 0 to disable the circuitry for catching misaligned memory Set this to 0 to disable the circuitry for catching misaligned memory

View File

@ -38,6 +38,7 @@ module picorv32 #(
parameter [ 0:0] LATCHED_MEM_RDATA = 0, parameter [ 0:0] LATCHED_MEM_RDATA = 0,
parameter [ 0:0] TWO_STAGE_SHIFT = 1, parameter [ 0:0] TWO_STAGE_SHIFT = 1,
parameter [ 0:0] TWO_CYCLE_COMPARE = 0, parameter [ 0:0] TWO_CYCLE_COMPARE = 0,
parameter [ 0:0] TWO_CYCLE_ALU = 0,
parameter [ 0:0] CATCH_MISALIGN = 1, parameter [ 0:0] CATCH_MISALIGN = 1,
parameter [ 0:0] CATCH_ILLINSN = 1, parameter [ 0:0] CATCH_ILLINSN = 1,
parameter [ 0:0] ENABLE_PCPI = 0, parameter [ 0:0] ENABLE_PCPI = 0,
@ -530,46 +531,104 @@ module picorv32 #(
reg [31:0] next_irq_pending; reg [31:0] next_irq_pending;
reg do_waitirq; reg do_waitirq;
reg [31:0] alu_out, reg_alu_out; reg [31:0] alu_out, alu_out_q;
reg alu_out_0, reg_alu_out_0; reg alu_out_0, alu_out_0_q;
reg alu_wait; reg alu_wait, alu_wait_2;
always @* begin always @* begin
alu_out_0 = 'bx;
(* parallel_case, full_case *)
case (1'b1)
instr_beq:
alu_out_0 = reg_op1 == reg_op2;
instr_bne:
alu_out_0 = reg_op1 != reg_op2;
instr_bge:
alu_out_0 = $signed(reg_op1) >= $signed(reg_op2);
instr_bgeu:
alu_out_0 = reg_op1 >= reg_op2;
is_slti_blt_slt:
alu_out_0 = $signed(reg_op1) < $signed(reg_op2);
is_sltiu_bltu_sltu:
alu_out_0 = reg_op1 < reg_op2;
endcase
alu_out = 'bx;
(* parallel_case, full_case *)
case (1'b1)
is_lui_auipc_jal_jalr_addi_add:
alu_out = reg_op1 + reg_op2;
instr_sub:
alu_out = reg_op1 - reg_op2;
is_compare:
alu_out = alu_out_0;
instr_xori || instr_xor:
alu_out = reg_op1 ^ reg_op2;
instr_ori || instr_or:
alu_out = reg_op1 | reg_op2;
instr_andi || instr_and:
alu_out = reg_op1 & reg_op2;
endcase
end end
generate if (TWO_CYCLE_ALU) begin:two_cycle_alu
reg [31:0] alu_add_sub;
reg [31:0] alu_xor_or_and;
reg alu_eq, alu_ltu, alu_lts;
always @(posedge clk) begin
alu_add_sub <= instr_sub ? reg_op1 - reg_op2 : reg_op1 + reg_op2;
alu_xor_or_and = 'bx;
(* parallel_case, full_case *)
case (1'b1)
instr_xori || instr_xor:
alu_xor_or_and = reg_op1 ^ reg_op2;
instr_ori || instr_or:
alu_xor_or_and = reg_op1 | reg_op2;
instr_andi || instr_and:
alu_xor_or_and = reg_op1 & reg_op2;
endcase
alu_eq <= reg_op1 == reg_op2;
alu_lts <= $signed(reg_op1) < $signed(reg_op2);
alu_ltu <= reg_op1 < reg_op2;
end
always @* begin
alu_out_0 = 'bx;
(* parallel_case, full_case *)
case (1'b1)
instr_beq:
alu_out_0 = alu_eq;
instr_bne:
alu_out_0 = !alu_eq;
instr_bge:
alu_out_0 = !alu_lts;
instr_bgeu:
alu_out_0 = !alu_ltu;
is_slti_blt_slt:
alu_out_0 = alu_lts;
is_sltiu_bltu_sltu:
alu_out_0 = alu_ltu;
endcase
alu_out = 'bx;
(* parallel_case, full_case *)
case (1'b1)
is_lui_auipc_jal_jalr_addi_add || instr_sub:
alu_out = alu_add_sub;
is_compare:
alu_out = alu_out_0;
|{instr_xori, instr_xor, instr_ori, instr_or, instr_andi, instr_and}:
alu_out = alu_xor_or_and;
endcase
end
end else begin:one_cycle_alu
always @* begin
alu_out_0 = 'bx;
(* parallel_case, full_case *)
case (1'b1)
instr_beq:
alu_out_0 = reg_op1 == reg_op2;
instr_bne:
alu_out_0 = reg_op1 != reg_op2;
instr_bge:
alu_out_0 = $signed(reg_op1) >= $signed(reg_op2);
instr_bgeu:
alu_out_0 = reg_op1 >= reg_op2;
is_slti_blt_slt:
alu_out_0 = $signed(reg_op1) < $signed(reg_op2);
is_sltiu_bltu_sltu:
alu_out_0 = reg_op1 < reg_op2;
endcase
alu_out = 'bx;
(* parallel_case, full_case *)
case (1'b1)
is_lui_auipc_jal_jalr_addi_add:
alu_out = reg_op1 + reg_op2;
instr_sub:
alu_out = reg_op1 - reg_op2;
is_compare:
alu_out = alu_out_0;
instr_xori || instr_xor:
alu_out = reg_op1 ^ reg_op2;
instr_ori || instr_or:
alu_out = reg_op1 | reg_op2;
instr_andi || instr_and:
alu_out = reg_op1 & reg_op2;
endcase
end
end endgenerate
always @(posedge clk) begin always @(posedge clk) begin
trap <= 0; trap <= 0;
reg_sh <= 'bx; reg_sh <= 'bx;
@ -578,9 +637,11 @@ module picorv32 #(
set_mem_do_rdata = 0; set_mem_do_rdata = 0;
set_mem_do_wdata = 0; set_mem_do_wdata = 0;
reg_alu_out <= alu_out; alu_out_0_q <= alu_out_0;
reg_alu_out_0 <= alu_out_0; alu_out_q <= alu_out;
alu_wait <= 0; alu_wait <= 0;
alu_wait_2 <= 0;
if (WITH_PCPI && CATCH_ILLINSN) begin if (WITH_PCPI && CATCH_ILLINSN) begin
if (resetn && pcpi_valid && !pcpi_int_wait) begin if (resetn && pcpi_valid && !pcpi_int_wait) begin
@ -646,13 +707,13 @@ module picorv32 #(
(* parallel_case *) (* parallel_case *)
case (1'b1) case (1'b1)
latched_branch: begin latched_branch: begin
current_pc = latched_store ? (latched_stalu ? reg_alu_out : reg_out) : reg_next_pc; current_pc = latched_store ? (latched_stalu ? alu_out_q : reg_out) : reg_next_pc;
`debug($display("ST_RD: %2d 0x%08x, BRANCH 0x%08x", latched_rd, reg_pc + 4, current_pc);) `debug($display("ST_RD: %2d 0x%08x, BRANCH 0x%08x", latched_rd, reg_pc + 4, current_pc);)
cpuregs[latched_rd] <= reg_pc + 4; cpuregs[latched_rd] <= reg_pc + 4;
end end
latched_store && !latched_branch: begin latched_store && !latched_branch: begin
`debug($display("ST_RD: %2d 0x%08x", latched_rd, latched_stalu ? reg_alu_out : reg_out);) `debug($display("ST_RD: %2d 0x%08x", latched_rd, latched_stalu ? alu_out_q : reg_out);)
cpuregs[latched_rd] <= latched_stalu ? reg_alu_out : reg_out; cpuregs[latched_rd] <= latched_stalu ? alu_out_q : reg_out;
end end
ENABLE_IRQ && irq_state[0]: begin ENABLE_IRQ && irq_state[0]: begin
cpuregs[latched_rd] <= current_pc; cpuregs[latched_rd] <= current_pc;
@ -775,7 +836,10 @@ module picorv32 #(
is_lui_auipc_jal: begin is_lui_auipc_jal: begin
reg_op1 <= instr_lui ? 0 : reg_pc; reg_op1 <= instr_lui ? 0 : reg_pc;
reg_op2 <= decoded_imm; reg_op2 <= decoded_imm;
mem_do_rinst <= mem_do_prefetch; if (TWO_CYCLE_ALU)
alu_wait <= 1;
else
mem_do_rinst <= mem_do_prefetch;
cpu_state <= cpu_state_exec; cpu_state <= cpu_state_exec;
end end
ENABLE_IRQ && ENABLE_IRQ_QREGS && instr_getq: begin ENABLE_IRQ && ENABLE_IRQ_QREGS && instr_getq: begin
@ -830,7 +894,10 @@ module picorv32 #(
`debug($display("LD_RS1: %2d 0x%08x", decoded_rs1, decoded_rs1 ? cpuregs[decoded_rs1] : 0);) `debug($display("LD_RS1: %2d 0x%08x", decoded_rs1, decoded_rs1 ? cpuregs[decoded_rs1] : 0);)
reg_op1 <= decoded_rs1 ? cpuregs[decoded_rs1] : 0; reg_op1 <= decoded_rs1 ? cpuregs[decoded_rs1] : 0;
reg_op2 <= decoded_imm; reg_op2 <= decoded_imm;
mem_do_rinst <= mem_do_prefetch; if (TWO_CYCLE_ALU)
alu_wait <= 1;
else
mem_do_rinst <= mem_do_prefetch;
cpu_state <= cpu_state_exec; cpu_state <= cpu_state_exec;
end end
default: begin default: begin
@ -850,9 +917,10 @@ module picorv32 #(
cpu_state <= cpu_state_shift; cpu_state <= cpu_state_shift;
end end
default: begin default: begin
if (TWO_CYCLE_COMPARE && is_beq_bne_blt_bge_bltu_bgeu) if (TWO_CYCLE_ALU || (TWO_CYCLE_COMPARE && is_beq_bne_blt_bge_bltu_bgeu)) begin
alu_wait_2 <= TWO_CYCLE_ALU && (TWO_CYCLE_COMPARE && is_beq_bne_blt_bge_bltu_bgeu);
alu_wait <= 1; alu_wait <= 1;
else end else
mem_do_rinst <= mem_do_prefetch; mem_do_rinst <= mem_do_prefetch;
cpu_state <= cpu_state_exec; cpu_state <= cpu_state_exec;
end end
@ -896,9 +964,10 @@ module picorv32 #(
cpu_state <= cpu_state_shift; cpu_state <= cpu_state_shift;
end end
default: begin default: begin
if (TWO_CYCLE_COMPARE && is_beq_bne_blt_bge_bltu_bgeu) if (TWO_CYCLE_ALU || (TWO_CYCLE_COMPARE && is_beq_bne_blt_bge_bltu_bgeu)) begin
alu_wait_2 <= TWO_CYCLE_ALU && (TWO_CYCLE_COMPARE && is_beq_bne_blt_bge_bltu_bgeu);
alu_wait <= 1; alu_wait <= 1;
else end else
mem_do_rinst <= mem_do_prefetch; mem_do_rinst <= mem_do_prefetch;
cpu_state <= cpu_state_exec; cpu_state <= cpu_state_exec;
end end
@ -906,17 +975,18 @@ module picorv32 #(
end end
cpu_state_exec: begin cpu_state_exec: begin
latched_store <= TWO_CYCLE_COMPARE ? reg_alu_out_0 : alu_out_0; latched_store <= TWO_CYCLE_COMPARE ? alu_out_0_q : alu_out_0;
latched_branch <= TWO_CYCLE_COMPARE ? reg_alu_out_0 : alu_out_0; latched_branch <= TWO_CYCLE_COMPARE ? alu_out_0_q : alu_out_0;
reg_out <= reg_pc + decoded_imm; reg_out <= reg_pc + decoded_imm;
if (TWO_CYCLE_COMPARE && alu_wait) begin if ((TWO_CYCLE_ALU || TWO_CYCLE_COMPARE) && (alu_wait || alu_wait_2)) begin
mem_do_rinst <= mem_do_prefetch; mem_do_rinst <= mem_do_prefetch && !alu_wait_2;
alu_wait <= alu_wait_2;
end else end else
if (is_beq_bne_blt_bge_bltu_bgeu) begin if (is_beq_bne_blt_bge_bltu_bgeu) begin
latched_rd <= 0; latched_rd <= 0;
if (mem_done) if (mem_done)
cpu_state <= cpu_state_fetch; cpu_state <= cpu_state_fetch;
if (TWO_CYCLE_COMPARE ? reg_alu_out_0 : alu_out_0) begin if (TWO_CYCLE_COMPARE ? alu_out_0_q : alu_out_0) begin
decoder_trigger <= 0; decoder_trigger <= 0;
set_mem_do_rinst = 1; set_mem_do_rinst = 1;
end end
@ -1187,6 +1257,7 @@ module picorv32_axi #(
parameter [ 0:0] ENABLE_REGS_DUALPORT = 1, parameter [ 0:0] ENABLE_REGS_DUALPORT = 1,
parameter [ 0:0] TWO_STAGE_SHIFT = 1, parameter [ 0:0] TWO_STAGE_SHIFT = 1,
parameter [ 0:0] TWO_CYCLE_COMPARE = 0, parameter [ 0:0] TWO_CYCLE_COMPARE = 0,
parameter [ 0:0] TWO_CYCLE_ALU = 0,
parameter [ 0:0] CATCH_MISALIGN = 1, parameter [ 0:0] CATCH_MISALIGN = 1,
parameter [ 0:0] CATCH_ILLINSN = 1, parameter [ 0:0] CATCH_ILLINSN = 1,
parameter [ 0:0] ENABLE_PCPI = 0, parameter [ 0:0] ENABLE_PCPI = 0,
@ -1283,6 +1354,7 @@ module picorv32_axi #(
.ENABLE_REGS_DUALPORT(ENABLE_REGS_DUALPORT), .ENABLE_REGS_DUALPORT(ENABLE_REGS_DUALPORT),
.TWO_STAGE_SHIFT (TWO_STAGE_SHIFT ), .TWO_STAGE_SHIFT (TWO_STAGE_SHIFT ),
.TWO_CYCLE_COMPARE (TWO_CYCLE_COMPARE ), .TWO_CYCLE_COMPARE (TWO_CYCLE_COMPARE ),
.TWO_CYCLE_ALU (TWO_CYCLE_ALU ),
.CATCH_MISALIGN (CATCH_MISALIGN ), .CATCH_MISALIGN (CATCH_MISALIGN ),
.CATCH_ILLINSN (CATCH_ILLINSN ), .CATCH_ILLINSN (CATCH_ILLINSN ),
.ENABLE_PCPI (ENABLE_PCPI ), .ENABLE_PCPI (ENABLE_PCPI ),