From 21157b8f1dca8f27de0a14cfa852d7a51183fb56 Mon Sep 17 00:00:00 2001 From: Clifford Wolf Date: Sun, 28 Jun 2015 15:41:55 +0200 Subject: [PATCH] Cleanups in PCPI interface --- README.md | 101 +++++++++++++++++++++++++++++++++++++++++++++------ picorv32.v | 105 ++++++++++++++++++++++------------------------------- 2 files changed, 133 insertions(+), 73 deletions(-) diff --git a/README.md b/README.md index 958d3b7..45f220f 100644 --- a/README.md +++ b/README.md @@ -79,8 +79,8 @@ Simply copy this file into your project. #### Makefile and testbench.v -A basic test environment run `make test`, `make test_sp` and/or `make test_axi` to run -the test firmware in different environments. +A basic test environment. Run `make test`, `make test_sp` and/or `make test_axi` to run +the test firmware in different hardware configurations. #### firmware/ @@ -205,13 +205,99 @@ For the Dhrystone benchmark the average CPI is 4.167. PicoRV32 Native Memory Interface -------------------------------- -This section is under construction. +The native memory interface of PicoRV32 is a simple valid-ready interface +that can run one memory transfer at a time: + + output mem_valid + output mem_instr + input mem_ready + + output [31:0] mem_addr + output [31:0] mem_wdata + output [ 3:0] mem_wstrb + input [31:0] mem_rdata + +The core initiates a memory transfer by asserting `mem_valid`. The valid +signal stays high until the peer asserts `mem_ready`. All core outputs +are stable over the `mem_valid` period. + +#### Read Transfer + +In a read transfer `mem_wstrb` has the value 0 and `mem_wdata` is unused. + +The memory reads the address `mem_addr` and makes the read value available on +`mem_rdata` in the cycle `mem_ready` is high. + +There is no need for an external wait cycle. The memory read can be implemented +asynchronously with `mem_ready` going high in the same cycle as `mem_valid`, or +`mem_ready` being tied to constant 1. + +#### Write Transfer + +In a write transfer `mem_wstrb` is not 0 and `mem_rdata` is unused. The memory +write the data at `mem_wdata` to the address `mem_addr` and acknowledges the +transfer by asserting `mem_ready`. + +There is no need for an external wait cycle. The memory can acknowledge the +write immediately with `mem_ready` going high in the same cycle as +`mem_valid`, or `mem_ready` being tied to constant 1. + +#### Look-Ahead Interface + +The PicoRV32 core also provides a "Look-Ahead Memory Interface" that provides +all information about the next memory transfer one clock cycle earlier than the +normal interface. + + output mem_la_read + output mem_la_write + output [31:0] mem_la_addr + output [31:0] mem_la_wdata + output [ 3:0] mem_la_wstrb + +In the clock cycle before `mem_valid` goes high, this interface will output a +pulse on `mem_la_read` or `mem_la_write` to indicate the start of a read or +write transaction in the next clock cycles. + +*Note: The signals `mem_la_read`, `mem_la_write`, and `mem_la_addr` are driven +by combinatorical circuits within the PicoRV32 core. It might be harder to +achieve timing closure with the look-ahead interface than with the normal +memory interface described above.* Pico Co-Processor Interface (PCPI) ---------------------------------- -This section is under construction. +The Pico Co-Processor Interface (PCPI) can be used to implement non-branching +instructions in external cores: + + output pcpi_valid + output [31:0] pcpi_insn + output [31:0] pcpi_rs1 + output [31:0] pcpi_rs2 + input pcpi_wr + input [31:0] pcpi_rd + input pcpi_wait + input pcpi_ready + +When an unsupported instruction is encountered and the PCPI feature is +activated (see ENABLE_PCPI above), then `pcpi_valid` is asserted, the +instruction word itself is output on `pcpi_insn`, the `rs1` and `rs2` +fields are decoded and the values in those registers are output +on `pcpi_rs1` and `pcpi_rs2`. + +An external PCPI core can then decode the instruction, execute it, and assert +`pcpi_ready` when execution of the instruction is finished. Optionally a +result value can be written to `pcpi_rd` and `pcpi_wr` asserted. The +PicoRV32 core will then decode the `rd` field of the instruction and +write the value from `pcpi_rd` to the respective register. + +When no external PCPI core acknowledges the instruction within 16 clock +cycles, then an illegal instruction exception is raised and the respective +interrupt handler is called. A PCPI core that needs more than a couple of +cycles to execute an instruction, should assert `pcpi_wait` as soon as +the instruction has been decoded successfully and keep it asserted until +it asserts `pcpi_ready`. This will prevent the PicoRV32 core from raising +an illegal instruction exception. Custom Instructions for IRQ Handling @@ -408,10 +494,3 @@ enabled PCPI, IRQ and MUL features. *Note: Most of the size reduction in the "small" core comes from eliminating the counter instructions, not from reducing the size of the register file.* - -Todos: ------- - -- Optional support for compressed ISA -- Improved documentation and examples - diff --git a/picorv32.v b/picorv32.v index b09c709..79fbbc5 100644 --- a/picorv32.v +++ b/picorv32.v @@ -57,13 +57,11 @@ module picorv32 #( output reg [ 3:0] mem_la_wstrb, // Pico Co-Processor Interface (PCPI) - output reg pcpi_insn_valid, + output reg pcpi_valid, output reg [31:0] pcpi_insn, - output reg pcpi_rs1_valid, output [31:0] pcpi_rs1, - output reg pcpi_rs2_valid, output [31:0] pcpi_rs2, - input pcpi_rd_valid, + input pcpi_wr, input [31:0] pcpi_rd, input pcpi_wait, input pcpi_ready, @@ -100,40 +98,38 @@ module picorv32 #( // Internal PCPI Cores - wire pcpi_mul_rd_valid; + wire pcpi_mul_wr; wire [31:0] pcpi_mul_rd; wire pcpi_mul_wait; wire pcpi_mul_ready; - reg pcpi_int_rd_valid; + reg pcpi_int_wr; reg [31:0] pcpi_int_rd; reg pcpi_int_wait; reg pcpi_int_ready; generate if (ENABLE_MUL) begin picorv32_pcpi_mul pcpi_mul ( - .clk (clk ), - .resetn (resetn ), - .pcpi_insn_valid(pcpi_insn_valid ), - .pcpi_insn (pcpi_insn ), - .pcpi_rs1_valid (pcpi_rs1_valid ), - .pcpi_rs1 (pcpi_rs1 ), - .pcpi_rs2_valid (pcpi_rs2_valid ), - .pcpi_rs2 (pcpi_rs2 ), - .pcpi_rd_valid (pcpi_mul_rd_valid), - .pcpi_rd (pcpi_mul_rd ), - .pcpi_wait (pcpi_mul_wait ), - .pcpi_ready (pcpi_mul_ready ) + .clk (clk ), + .resetn (resetn ), + .pcpi_valid(pcpi_valid ), + .pcpi_insn (pcpi_insn ), + .pcpi_rs1 (pcpi_rs1 ), + .pcpi_rs2 (pcpi_rs2 ), + .pcpi_wr (pcpi_mul_wr ), + .pcpi_rd (pcpi_mul_rd ), + .pcpi_wait (pcpi_mul_wait ), + .pcpi_ready(pcpi_mul_ready ) ); end else begin - assign pcpi_mul_rd_valid = 0; + assign pcpi_mul_wr = 0; assign pcpi_mul_rd = 1'bx; assign pcpi_mul_wait = 0; assign pcpi_mul_ready = 0; end endgenerate always @* begin - pcpi_int_rd_valid = 0; + pcpi_int_wr = 0; pcpi_int_rd = 1'bx; pcpi_int_wait = |{ENABLE_PCPI && pcpi_wait, ENABLE_MUL && pcpi_mul_wait}; pcpi_int_ready = |{ENABLE_PCPI && pcpi_ready, ENABLE_MUL && pcpi_mul_ready}; @@ -141,11 +137,11 @@ module picorv32 #( (* parallel_case *) case (1'b1) ENABLE_PCPI && pcpi_ready: begin - pcpi_int_rd_valid = pcpi_rd_valid; + pcpi_int_wr = pcpi_wr; pcpi_int_rd = pcpi_rd; end ENABLE_MUL && pcpi_mul_ready: begin - pcpi_int_rd_valid = pcpi_mul_rd_valid; + pcpi_int_wr = pcpi_mul_wr; pcpi_int_rd = pcpi_mul_rd; end endcase @@ -570,7 +566,7 @@ module picorv32 #( reg_alu_out <= alu_out; if (WITH_PCPI) begin - if (pcpi_insn_valid && !pcpi_int_wait) begin + if (pcpi_valid && !pcpi_int_wait) begin if (pcpi_timeout_counter) pcpi_timeout_counter <= pcpi_timeout_counter - 1; end else @@ -609,9 +605,7 @@ module picorv32 #( latched_is_lu <= 0; latched_is_lh <= 0; latched_is_lb <= 0; - pcpi_insn_valid <= 0; - pcpi_rs1_valid <= 0; - pcpi_rs2_valid <= 0; + pcpi_valid <= 0; irq_active <= 0; irq_mask <= ~0; next_irq_pending = 0; @@ -711,20 +705,16 @@ module picorv32 #( `endif if (instr_trap) begin if (WITH_PCPI) begin - pcpi_rs1_valid <= 1; - pcpi_insn_valid <= 1; reg_op1 <= decoded_rs1 ? cpuregs[decoded_rs1] : 0; if (ENABLE_REGS_DUALPORT) begin - pcpi_rs2_valid <= 1; + pcpi_valid <= 1; reg_sh <= decoded_rs2 ? cpuregs[decoded_rs2] : 0; reg_op2 <= decoded_rs2 ? cpuregs[decoded_rs2] : 0; if (pcpi_int_ready) begin mem_do_rinst <= 1; - pcpi_insn_valid <= 0; - pcpi_rs1_valid <= 0; - pcpi_rs2_valid <= 0; + pcpi_valid <= 0; reg_out <= pcpi_int_rd; - latched_store <= pcpi_int_rd_valid; + latched_store <= pcpi_int_wr; cpu_state <= cpu_state_fetch; end else if (pcpi_timeout) begin @@ -842,15 +832,13 @@ module picorv32 #( `endif reg_sh <= decoded_rs2 ? cpuregs[decoded_rs2] : 0; reg_op2 <= decoded_rs2 ? cpuregs[decoded_rs2] : 0; - if (WITH_PCPI && pcpi_insn_valid) begin - pcpi_rs2_valid <= 1; + if (WITH_PCPI && instr_trap) begin + pcpi_valid <= 1; if (pcpi_int_ready) begin mem_do_rinst <= 1; - pcpi_insn_valid <= 0; - pcpi_rs1_valid <= 0; - pcpi_rs2_valid <= 0; + pcpi_valid <= 0; reg_out <= pcpi_int_rd; - latched_store <= pcpi_int_rd_valid; + latched_store <= pcpi_int_wr; cpu_state <= cpu_state_fetch; end else if (pcpi_timeout) begin @@ -1030,13 +1018,11 @@ module picorv32_pcpi_mul #( ) ( input clk, resetn, - input pcpi_insn_valid, + input pcpi_valid, input [31:0] pcpi_insn, - input pcpi_rs1_valid, input [31:0] pcpi_rs1, - input pcpi_rs2_valid, input [31:0] pcpi_rs2, - output reg pcpi_rd_valid, + output reg pcpi_wr, output reg [31:0] pcpi_rd, output reg pcpi_wait, output reg pcpi_ready @@ -1056,8 +1042,7 @@ module picorv32_pcpi_mul #( instr_mulhsu <= 0; instr_mulhu <= 0; - if (resetn && pcpi_insn_valid && pcpi_rs1_valid && pcpi_rs2_valid && - pcpi_insn[6:0] == 7'b0110011 && pcpi_insn[31:25] == 7'b0000001) begin + if (resetn && pcpi_valid && pcpi_insn[6:0] == 7'b0110011 && pcpi_insn[31:25] == 7'b0000001) begin case (pcpi_insn[14:12]) 3'b000: instr_mul <= 1; 3'b001: instr_mulh <= 1; @@ -1133,12 +1118,12 @@ module picorv32_pcpi_mul #( end always @(posedge clk) begin - pcpi_rd_valid <= 0; + pcpi_wr <= 0; pcpi_ready <= 0; if (mul_finish) begin - pcpi_rd <= instr_any_mulh ? rd >> 32 : rd; - pcpi_rd_valid <= 1; + pcpi_wr <= 1; pcpi_ready <= 1; + pcpi_rd <= instr_any_mulh ? rd >> 32 : rd; end end endmodule @@ -1187,13 +1172,11 @@ module picorv32_axi #( input [31:0] mem_axi_rdata, // Pico Co-Processor Interface (PCPI) - output pcpi_insn_valid, + output pcpi_valid, output [31:0] pcpi_insn, - output pcpi_rs1_valid, output [31:0] pcpi_rs1, - output pcpi_rs2_valid, output [31:0] pcpi_rs2, - input pcpi_rd_valid, + input pcpi_wr, input [31:0] pcpi_rd, input pcpi_wait, input pcpi_ready, @@ -1262,16 +1245,14 @@ module picorv32_axi #( .mem_ready(mem_ready), .mem_rdata(mem_rdata), - .pcpi_insn_valid(pcpi_insn_valid), - .pcpi_insn (pcpi_insn ), - .pcpi_rs1_valid (pcpi_rs1_valid ), - .pcpi_rs1 (pcpi_rs1 ), - .pcpi_rs2_valid (pcpi_rs2_valid ), - .pcpi_rs2 (pcpi_rs2 ), - .pcpi_rd_valid (pcpi_rd_valid ), - .pcpi_rd (pcpi_rd ), - .pcpi_wait (pcpi_wait ), - .pcpi_ready (pcpi_ready ), + .pcpi_valid(pcpi_valid), + .pcpi_insn (pcpi_insn ), + .pcpi_rs1 (pcpi_rs1 ), + .pcpi_rs2 (pcpi_rs2 ), + .pcpi_wr (pcpi_wr ), + .pcpi_rd (pcpi_rd ), + .pcpi_wait (pcpi_wait ), + .pcpi_ready(pcpi_ready), .irq(irq), .eoi(eoi)