From 37f2ebe675accebd6b869ece692a3cc506a3ccdf Mon Sep 17 00:00:00 2001 From: bunnie Date: Sun, 25 Oct 2020 17:50:56 +0800 Subject: [PATCH 1/6] add responder for type 0 cti, so that wb debug access works --- litex/soc/cores/spi_opi.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/litex/soc/cores/spi_opi.py b/litex/soc/cores/spi_opi.py index c1f9e0f88..e5637f4f3 100644 --- a/litex/soc/cores/spi_opi.py +++ b/litex/soc/cores/spi_opi.py @@ -537,7 +537,7 @@ class S7SPIOPI(Module, AutoCSR, AutoDoc): NextValue(rx_fifo_rst, 1) ).Elif(opi_rx_run, NextValue(rx_wren, 1), - If((bus.cyc & bus.stb & ~bus.we) & ((bus.cti == 2) | + If((bus.cyc & bus.stb & ~bus.we) & ((bus.cti == 2) | (bus.cti == 0) | ((bus.cti == 7) & ~bus.ack) ), # handle case of non-pipelined read, ack is late If(~rx_empty, NextValue(bus.dat_r, opi_fifo_rd), @@ -576,7 +576,7 @@ class S7SPIOPI(Module, AutoCSR, AutoDoc): tx_almostfull = Signal() self.sync += tx_almostfull.eq(rx_almostfull) # sync the rx_almostfull signal into the local clock domain txphy_bus = Signal() - self.sync += txphy_bus.eq(bus.cyc & bus.stb & ~bus.we & (bus.cti == 2)) + self.sync += txphy_bus.eq(bus.cyc & bus.stb & ~bus.we & ((bus.cti == 2) | (bus.cti == 0))) tx_resetcycle = Signal() self.submodules.txphy = txphy = FSM(reset_state="RESET") @@ -636,7 +636,7 @@ class S7SPIOPI(Module, AutoCSR, AutoDoc): ) txphy.act("TX_FILL", If(tx_run, - If(((~txphy_bus & (bus.cyc & bus.stb & ~bus.we & (bus.cti == 2))) & + If(((~txphy_bus & (bus.cyc & bus.stb & ~bus.we & ((bus.cti == 2) | (bus.cti == 0)) )) & (opi_addr[2:] != bus.adr)) | tx_resetcycle, # Tt's a new bus cycle, and the requested address is not equal to the current # read buffer address @@ -705,7 +705,7 @@ class S7SPIOPI(Module, AutoCSR, AutoDoc): # - then run the command # - Else wait until a bus cycle, and once it happens, put the system into run mode If(bus.cyc & bus.stb, - If(~bus.we & (bus.cti ==2), + If(~bus.we & ((bus.cti == 2) | (bus.cti == 0)), NextState("TX_RUN") ).Else( # Handle other cases here, e.g. what do we do if we get a write? probably @@ -862,7 +862,7 @@ class S7SPIOPI(Module, AutoCSR, AutoDoc): NextValue(addr_updated, 1), NextValue(spi_cs_n, 1), # raise CS in anticipation of a new address cycle NextState("SPI_READ_32_CS"), - ).Elif( (rom_addr[2:] == bus.adr) | (~new_cycle & bus.cti == 2), + ).Elif( (rom_addr[2:] == bus.adr) | (~new_cycle & ((bus.cti == 2) | (bus.cti == 0)) ), NextValue(mac_count, 3), # get another beat of 4 bytes at the next address NextState("SPI_READ_32") ).Else( From e8c39ec3d2e4adb8711d61fdd0b4eaed8fe6ad61 Mon Sep 17 00:00:00 2001 From: bunnie Date: Thu, 29 Oct 2020 05:09:18 +0800 Subject: [PATCH 2/6] add generic command processing state machine facilitates page writes and sector erases first commit, debugging now commencing --- litex/soc/cores/spi_opi.py | 163 ++++++++++++++++++++++++++++++++----- 1 file changed, 144 insertions(+), 19 deletions(-) diff --git a/litex/soc/cores/spi_opi.py b/litex/soc/cores/spi_opi.py index e5637f4f3..6c428d58d 100644 --- a/litex/soc/cores/spi_opi.py +++ b/litex/soc/cores/spi_opi.py @@ -5,6 +5,7 @@ # SPDX-License-Identifier: BSD-2-Clause from migen.genlib.cdc import MultiReg +from migen.genlib.fifo import SyncFIFOBuffered from litex.soc.interconnect import wishbone from litex.soc.interconnect.csr_eventmanager import * @@ -413,12 +414,22 @@ class S7SPIOPI(Module, AutoCSR, AutoDoc): self.command = CSRStorage(description="Write individual bits to issue special commands to SPI; setting multiple bits at once leads to undefined behavior.", fields=[ CSRField("wakeup", size=1, description="Sequence through init & wakeup routine"), - CSRField("sector_erase", size=1, description="Erase a sector"), + CSRField("exec_cmd", size=1, description="Writing a `1` executes a manual command", pulse=True), + CSRField("cmd_code", size=8, description="Manual command code (first 8 bits, e.g. PP4B is 0x12)"), + CSRField("has_arg", size=1, description="When set, transmits the value of `cmd_arg` as the argument to the command"), + # CSRField("write_cmd", size=1, description="When `1`, `data_bytes` are written from page FIFO; when `0`, up to 4 STR `data_bytes` are read into readback CSR"), + CSRField("dummy_cycles", size=5, description="Number of dummy cycles for manual command; 0 implies a write, >0 implies read"), + CSRField("data_bytes", size=8, description="Number of data bytes"), ]) - self.sector = CSRStorage(description="Sector to erase", + self.cmd_arg = CSRStorage(description="Command argument", fields=[ - CSRField("sector", size=32, description="Sector to erase") + CSRField("cmd_arg", size=32, description="Argument to manual command") ]) + self.cmd_rbk_data = CSRStatus(description = "Readback data from commands", + fields=[ + CSRField("cmd_rbk_data", size=32, description="Data read back from a cmd_code that has `write_code` set to 0"), + ] + ) self.status = CSRStatus(description="Interface status", fields=[ CSRField("wip", size=1, description="Operation in progress (write or erease)") @@ -520,16 +531,34 @@ class S7SPIOPI(Module, AutoCSR, AutoDoc): self.sync.dqs += opi_di.eq(self.di) self.comb += opi_fifo_wd.eq(Cat(opi_di, self.di)) self.sync += rx_fifo_rst_pipe.eq(rx_fifo_rst) # add one pipe register to help relax this timing path. It is critical so it must be timed, but one extra cycle is OK. + rbk_data = Signal(32) + self.sync += rbk_data.eq(opi_fifo_wd) # buffer for capture to CSR on command cycles + bus_ack_r = Signal() + bus_ack_w = Signal() + + #--------- Page write data responder ----------------------- + self.submodules.txwr_fifo = SyncFIFOBuffered(width=16, depth=256) + got_wb_wr = Signal() + got_wb_wr_r = Signal() + self.comb += [ + self.txwr_fifo.din.eq(bus.dat_r[:16]), # lower 16 bits only used + got_wb_wr.eq(bus.cyc & bus.stb & bus.we), + bus.ack.eq(bus_ack_r | bus_ack_w), + ] + self.sync += [ + got_wb_wr_r.eq(got_wb_wr), + self.txwr_fifo.we.eq(got_wb_wr & ~got_wb_wr_r), + bus_ack_w.eq(got_wb_wr), + ] #--------- OPI Rx Phy machine ------------------------------ self.submodules.rxphy = rxphy = FSM(reset_state="IDLE") - cti_pipe = Signal(3) rxphy_cnt = Signal(3) rxphy.act("IDLE", If(spi_mode, NextState("IDLE"), ).Else( - NextValue(bus.ack, 0), + NextValue(bus_ack_r, 0), If(opi_reset_rx_req, NextState("WAIT_RESET"), NextValue(rxphy_cnt, 6), @@ -543,7 +572,7 @@ class S7SPIOPI(Module, AutoCSR, AutoDoc): NextValue(bus.dat_r, opi_fifo_rd), rx_rden.eq(1), NextValue(opi_addr, opi_addr + 4), - NextValue(bus.ack, 1) + NextValue(bus_ack_r, 1) ) ) ) @@ -569,6 +598,7 @@ class S7SPIOPI(Module, AutoCSR, AutoDoc): txcmd_clken = Signal() txphy_oe = Signal() txcmd_oe = Signal() + txwr_cnt = Signal(8) self.sync += opi_cs_n.eq( (tx_run & txphy_cs_n) | (~tx_run & txcmd_cs_n) ) self.comb += If( tx_run, self.do.eq(txphy_do) ).Else( self.do.eq(txcmd_do) ) self.comb += opi_clk_en.eq( (tx_run & txphy_clken) | (~tx_run & txcmd_clken) ) @@ -579,15 +609,30 @@ class S7SPIOPI(Module, AutoCSR, AutoDoc): self.sync += txphy_bus.eq(bus.cyc & bus.stb & ~bus.we & ((bus.cti == 2) | (bus.cti == 0))) tx_resetcycle = Signal() + cmd_req = Signal() + cmd_ack = Signal() + self.sync += [ + If(self.command.fields.exec_cmd, + cmd_req.eq(1), + ).Elif(cmd_ack, + cmd_req.eq(0), + ).Else( + cmd_req.eq(cmd_req) + ) + ] + cmd_run = Signal() + cmd_done = Signal() + self.submodules.txphy = txphy = FSM(reset_state="RESET") txphy.act("RESET", NextValue(opi_rx_run, 0), NextValue(txphy_oe, 0), NextValue(txphy_cs_n, 1), NextValue(txphy_clken, 0), + NextValue(cmd_done, 0), # guarantee that the first state we go to out of reset is a four-cycle burst NextValue(txphy_cnt, 4), - If(tx_run & ~spi_mode, + If( (tx_run | cmd_run) & ~spi_mode, NextState("TX_SETUP") ) ) @@ -603,7 +648,11 @@ class S7SPIOPI(Module, AutoCSR, AutoDoc): ) ) txphy.act("TX_CMD_CS_DELAY", # meet setup timing for CS-to-clock - NextState("TX_CMD") + If( tx_run, + NextState("TX_CMD") + ).Elif( cmd_run, + NextState("TX_MAN_CMD") + ) ) txphy.act("TX_CMD", NextValue(txphy_do, 0xEE11), @@ -672,6 +721,82 @@ class S7SPIOPI(Module, AutoCSR, AutoDoc): NextValue(txphy_clken, 1), ) ) + txphy.act("TX_MAN_CMD", + NextValue(txphy_do, Cat(~self.command.fields.cmd_code, self.command.fields.cmd_code)), + NextValue(txphy_clken, 1), + If(self.command.fields.has_arg, + NextState("TX_ARGHI") + ).Elif(self.command.fields.dummy_cycles > 0, # implies a read + NextValue(txphy_cnt, self.command.fields.dummy_cycles - 1), + NextState("TX_MAN_DUMMY") + ).Elif(self.command.fields.data_bytes > 0, # write is implied if dummy cycles is 0 + NextValue(txwr_cnt, self.command.fields.data_bytes), + NextState("TX_WRDATA") + ).Else( # simple command with no data or readback + NextState("RESET"), + NextValue(cmd_done, 1), + ) + ) + txphy.act("TX_ARGHI", + NextValue(txphy_do, self.cmd_arg.fields.cmd_arg[16:]), + NextState("TX_ARGLO") + ) + txphy.act("TX_ARGLO", + NextValue(txphy_do, self.cmd_arg.fields.cmd_arg[:16]), + If(self.command.fields.dummy_cycles > 0, + NextValue(txphy_cnt, self.command.fields.dummy_cycles - 1), + NextState("TX_MAN_DUMMY") + ).Else(# self.command.fields.write_cmd, # write is implied if dummy cycles is 0 + NextValue(txwr_cnt, self.command.fields.data_bytes), + NextState("TX_WRDATA") + ) + ) + txphy.act("TX_MAN_DUMMY", + NextValue(txphy_oe, 0), + NextValue(txphy_do, 0), + NextValue(txphy_cnt, txphy_cnt - 1), + If(txphy_cnt == 0, + NextValue(opi_rx_run, 1), + # always a readback after a dummy cycle + NextValue(txphy_cnt, self.command.fields.data_bytes[:4] - 1), # ignore upper bits + NextState("TX_MAN_RBK"), + ) + ) + txphy.act("TX_MAN_RBK", + If(txphy_cnt == 0, + NextValue(txphy_clken, 1), + NextValue(opi_reset_rx_req, 1), + NextState("TX_RESET_RX"), + NextValue(self.cmd_rbk_data.fields.cmd_rbk_data, rbk_data), + NextValue(cmd_done, 1), # done with readback + ).Else( + NextValue(txphy_cnt, txphy_cnt - 1), + ) + ) + txphy.act("TX_WRDATA", + If(txwr_cnt == 0, + NextState("TX_WR_RESET"), + ).Else( + NextValue(txwr_cnt, txwr_cnt - 1), + NextValue(txphy_do, self.txwr_fifo.dout), + self.txwr_fifo.re.eq(1), + ) + ) + txphy.act("TX_WR_RESET", + NextValue(opi_rx_run, 0), + NextValue(txphy_oe, 0), + NextValue(txphy_cs_n, 1), + NextValue(txphy_clken, 0), + NextValue(cmd_done, 0), + # drain any excess values in the page FIFO + If(self.txwr_fifo.readable, + self.txwr_fifo.re.eq(1), + ).Else( + NextState("RESET"), + NextValue(cmd_done, 1), + ) + ) + #--------- OPI CMD machine ------------------------------ self.submodules.opicmd = opicmd = FSM(reset_state="RESET") @@ -679,6 +804,7 @@ class S7SPIOPI(Module, AutoCSR, AutoDoc): NextValue(txcmd_do, 0), NextValue(txcmd_oe, 0), NextValue(tx_run, 0), + NextValue(cmd_run, 0), NextValue(txcmd_cs_n, 1), If(~spi_mode, NextState("IDLE") @@ -711,14 +837,14 @@ class S7SPIOPI(Module, AutoCSR, AutoDoc): # Handle other cases here, e.g. what do we do if we get a write? probably # should just ACK it without doing anything so the CPU doesn't freeze... ) - ).Elif(self.command.re, + ).Elif(cmd_req, NextState("DISPATCH_CMD"), ) ) ) opicmd.act("TX_RUN", NextValue(tx_run, 1), - If(self.command.re, # Respond to commands + If(cmd_req, # Respond to commands NextState("WAIT_DISPATCH") ) ) @@ -730,15 +856,14 @@ class S7SPIOPI(Module, AutoCSR, AutoDoc): ) ) opicmd.act("DISPATCH_CMD", - If(self.command.fields.sector_erase, - NextState("DO_SECTOR_ERASE") + cmd_ack.eq(1), # clear the command dispatch pulse cache + If(cmd_done, + NextValue(cmd_run, 0), + NextState("TX_RUN"), ).Else( - NextState("IDLE") + NextValue(cmd_run, 1), ) ) - opicmd.act("DO_SECTOR_ERASE", - # Placeholder - ) # MAC/PHY abstraction for the SPI machine spi_req = Signal() @@ -850,12 +975,12 @@ class S7SPIOPI(Module, AutoCSR, AutoDoc): NextValue(mac_count, 0), NextState("WAKEUP_PRE"), NextValue(new_cycle, 1), - If(spi_mode, NextValue(bus.ack, 0)), + If(spi_mode, NextValue(bus_ack_r, 0)), ) if spiread: mac.act("IDLE", If(spi_mode, # This machine stays in idle once spi_mode is dropped - NextValue(bus.ack, 0), + NextValue(bus_ack_r, 0), If((bus.cyc == 1) & (bus.stb == 1) & (bus.we == 0) & (bus.cti != 7), # read cycle requested, not end-of-burst If( (rom_addr[2:] != bus.adr) & new_cycle, NextValue(rom_addr, Cat(Signal(2, reset=0), bus.adr)), @@ -1064,7 +1189,7 @@ class S7SPIOPI(Module, AutoCSR, AutoDoc): # handle otherwise implicit dual-controller situation If(spi_mode, NextValue(bus.dat_r, Cat(d_to_wb[8:],spi_di)), - NextValue(bus.ack, 1), + NextValue(bus_ack_r, 1), ), NextValue(rom_addr, rom_addr + 1), NextState("IDLE") From 6e806ce60cf67ce98041aa3fb198ba156485eb97 Mon Sep 17 00:00:00 2001 From: bunnie Date: Wed, 4 Nov 2020 04:39:47 +0800 Subject: [PATCH 3/6] refactor SPI DOPI interface to support arbitrary commands, not jsut reads lays the groundwork for doing page programming and sector erasing --- litex/soc/cores/spi_opi.py | 389 +++++++++++++++++++++++++------------ 1 file changed, 263 insertions(+), 126 deletions(-) diff --git a/litex/soc/cores/spi_opi.py b/litex/soc/cores/spi_opi.py index 6c428d58d..b441322a8 100644 --- a/litex/soc/cores/spi_opi.py +++ b/litex/soc/cores/spi_opi.py @@ -14,14 +14,68 @@ from litex.soc.integration.doc import AutoDoc, ModuleDoc class S7SPIOPI(Module, AutoCSR, AutoDoc): - def __init__(self, pads, - dq_delay_taps = 31, + def __init__(self, platform, padgroup_name, + dq_delay_taps = 0, sclk_name = "SCLK_ODDR", iddr_name = "SPI_IDDR", cipo_name = "CIPO_FDRE", sim = False, spiread = False, prefetch_lines = 1): + + pads = platform.request(padgroup_name) + self.dq = dq = TSTriple(7) # dq[0] is special because it is also copi + self.dq_copi = dq_copi = TSTriple(1) # this has similar structure but an independent "oe" signal + + # reminder to self: the {{ and }} overloading is because Python treats these as special in strings, so {{ -> { in actual constraint + # NOTE: ECSn is deliberately not constrained -- it's more or less async (0-10ns delay on the signal, only meant to line up with "block" region + + # constrain DQS-to-DQ input DDR delays + platform.add_platform_command("create_clock -name spidqs -period 10 [get_ports {}_dqs]".format(padgroup_name)) + platform.add_platform_command("set_input_delay -clock spidqs -max 0.6 [get_ports {{" + padgroup_name + "_dq[*]}}]") + platform.add_platform_command("set_input_delay -clock spidqs -min 4.4 [get_ports {{" + padgroup_name + "_dq[*]}}]") + platform.add_platform_command( + "set_input_delay -clock spidqs -max 0.6 [get_ports {{" + padgroup_name + "_dq[*]}}] -clock_fall -add_delay") + platform.add_platform_command( + "set_input_delay -clock spidqs -min 4.4 [get_ports {{" + padgroup_name + "_dq[*]}}] -clock_fall -add_delay") + + # derive clock for SCLK - clock-forwarded from DDR see Xilinx answer 62488 use case #4 + platform.add_platform_command( + "create_generated_clock -name spiclk_out -multiply_by 1 -source [get_pins {}/Q] [get_ports {}_sclk]".format( + sclk_name, padgroup_name)) + + # constrain CIPO SDR delay -- WARNING: -max is 'actually' 5.0ns, but design can't meet timing @ 5.0 tPD from SPIROM. There is some margin in the timing closure tho, so 4.5ns is probably going to work.... + platform.add_platform_command( + "set_input_delay -clock [get_clocks spiclk_out] -clock_fall -max 4.5 [get_ports {}_dq[1]]".format(padgroup_name)) + platform.add_platform_command( + "set_input_delay -clock [get_clocks spiclk_out] -clock_fall -min 1 [get_ports {}_dq[1]]".format(padgroup_name)) + # corresponding false path on CIPO DDR input when clocking SDR data + platform.add_platform_command( + "set_false_path -from [get_clocks spiclk_out] -to [get_pin {}/D ]".format(iddr_name + "1")) + # corresponding false path on CIPO SDR input from DQS strobe, only if the cipo path is used + if spiread: + platform.add_platform_command( + "set_false_path -from [get_clocks spidqs] -to [get_pin {}/D ]".format(cipo_name)) + + # constrain CLK-to-DQ output DDR delays; copi uses the same rules + platform.add_platform_command( + "set_output_delay -clock [get_clocks spiclk_out] -max 1 [get_ports {{" + padgroup_name + "_dq[*]}}]") + platform.add_platform_command( + "set_output_delay -clock [get_clocks spiclk_out] -min -1 [get_ports {{" + padgroup_name + "_dq[*]}}]") + platform.add_platform_command( + "set_output_delay -clock [get_clocks spiclk_out] -max 1 [get_ports {{" + padgroup_name + "_dq[*]}}] -clock_fall -add_delay") + platform.add_platform_command( + "set_output_delay -clock [get_clocks spiclk_out] -min -1 [get_ports {{" + padgroup_name + "_dq[*]}}] -clock_fall -add_delay") + # constrain CLK-to-CS output delay. NOTE: timings require one dummy cycle insertion between CS and SCLK (de)activations. Not possible to meet timing for DQ & single-cycle CS due to longer tS/tH reqs for CS + platform.add_platform_command( + "set_output_delay -clock [get_clocks spiclk_out] -min -1 [get_ports {}_cs_n]".format(padgroup_name)) # -3 in reality + platform.add_platform_command( + "set_output_delay -clock [get_clocks spiclk_out] -max 1 [get_ports {}_cs_n]".format(padgroup_name)) # 4.5 in reality + # unconstrain OE path - we have like 10+ dummy cycles to turn the bus on wr->rd, and 2+ cycles to turn on end of read + platform.add_platform_command("set_false_path -through [ get_pins {net}_reg/Q ]", net=dq.oe) + platform.add_platform_command("set_false_path -through [ get_pins {net}_reg/Q ]", + net=dq_copi.oe) + self.intro = ModuleDoc("""Intro SpiOpi implements a dual-mode SPI or OPI interface. OPI is an octal (8-bit) wide variant of @@ -78,8 +132,16 @@ class S7SPIOPI(Module, AutoCSR, AutoDoc): dq_delay_taps probably doesn't need to be adjusted; it can be tweaked for timing closure. The delays can also be adjusted at runtime. """) - if prefetch_lines > 63: - prefetch_lines = 63 + + if sim == False: + idelay_name = "IDELAYE2" + bufr_name = "BUFG" # we actually want a slightly slower buffer here... + else: + idelay_name = "IDELAYE2_SIM" + bufr_name = "BUFR_SIM" + + if prefetch_lines > 62: + prefetch_lines = 62 self.spi_mode = spi_mode = Signal(reset=1) # When reset is asserted, force into spi mode cs_n = Signal(reset=1) # Make sure CS is sane on reset, too @@ -95,7 +157,7 @@ class S7SPIOPI(Module, AutoCSR, AutoDoc): self.clock_domains.cd_dqs = ClockDomain(reset_less=True) self.comb += self.cd_dqs.clk.eq(dqs_iobuf) self.specials += [ - Instance("BUFR", i_I=pads.dqs, o_O=dqs_iobuf), + Instance(bufr_name, i_I=pads.dqs, o_O=dqs_iobuf), ] # DQ connections ------------------------------------------------------------------------- @@ -109,14 +171,21 @@ class S7SPIOPI(Module, AutoCSR, AutoDoc): # Delay programming API self.delay_config = CSRStorage(fields=[ - CSRField("d", size=5, description="Delay amount; each increment is 78ps", reset=31), + CSRField("d", size=5, description="Delay amount; each increment is 78ps", reset=dq_delay_taps), CSRField("load", size=1, description="Force delay taps to delay_d"), ]) self.delay_status = CSRStatus(fields=[ CSRField("q", size=5, description="Readback of current delay amount, useful if inc/ce is used to set"), ]) self.delay_update = Signal() - self.hw_delay_load = Signal() + self.hw_delay_load = Signal(reset=1) # latch in the initial value on reset + reset_counter = Signal(4, reset=15) + self.sync += \ + If(reset_counter != 0, + reset_counter.eq(reset_counter - 1) + ).Else( + self.hw_delay_load.eq(0) + ) self.sync += self.delay_update.eq(self.hw_delay_load | self.delay_config.fields.load) # Break system API into rising/falling edge samples @@ -129,7 +198,6 @@ class S7SPIOPI(Module, AutoCSR, AutoDoc): self.comb += self.di.eq(Cat(di_fall, di_rise)) # OPI DDR registers - self.dq = dq = TSTriple(7) # dq[0] is special because it is also copi dq_delayed = Signal(8) self.specials += dq.get_tristate(pads.dq[1:]) for i in range(1, 8): @@ -143,53 +211,53 @@ class S7SPIOPI(Module, AutoCSR, AutoDoc): i_D2 = do_fall[i], o_Q = dq.o[i-1], ) - if sim == False: - if i == 1: # Only wire up o_CNTVALUEOUT for one instance - self.specials += Instance("IDELAYE2", - p_DELAY_SRC = "IDATAIN", - p_SIGNAL_PATTERN = "DATA", - p_CINVCTRL_SEL = "FALSE", - p_HIGH_PERFORMANCE_MODE = "FALSE", - p_REFCLK_FREQUENCY = 200.0, - p_PIPE_SEL = "FALSE", - p_IDELAY_VALUE = dq_delay_taps, - p_IDELAY_TYPE = delay_type, + if i == 1: # Only wire up o_CNTVALUEOUT for one instance + self.specials += Instance(idelay_name, + p_DELAY_SRC = "IDATAIN", + p_SIGNAL_PATTERN = "DATA", + p_CINVCTRL_SEL = "FALSE", + p_HIGH_PERFORMANCE_MODE = "FALSE", + p_REFCLK_FREQUENCY = 200.0, + p_PIPE_SEL = "FALSE", + p_IDELAY_VALUE = dq_delay_taps, + p_IDELAY_TYPE = delay_type, + + i_C = ClockSignal(), + i_CINVCTRL = 0, + i_REGRST = 0, + i_LDPIPEEN = 0, + i_INC = 0, + i_CE = 0, + i_LD = self.delay_update, + i_CNTVALUEIN = self.delay_config.fields.d, + o_CNTVALUEOUT = self.delay_status.fields.q, + i_IDATAIN = dq.i[i-1], + o_DATAOUT = dq_delayed[i], + i_DATAIN=0, + ), + else: # Don't wire up o_CNTVALUEOUT for others + self.specials += Instance(idelay_name, + p_DELAY_SRC = "IDATAIN", + p_SIGNAL_PATTERN = "DATA", + p_CINVCTRL_SEL = "FALSE", + p_HIGH_PERFORMANCE_MODE = "FALSE", + p_REFCLK_FREQUENCY = 200.0, + p_PIPE_SEL = "FALSE", + p_IDELAY_VALUE = dq_delay_taps, + p_IDELAY_TYPE = delay_type, + i_C = ClockSignal(), + i_CINVCTRL = 0, + i_REGRST = 0, + i_LDPIPEEN = 0 , + i_INC = 0, + i_CE = 0, + i_LD = self.delay_update, + i_CNTVALUEIN = self.delay_config.fields.d, + i_IDATAIN = dq.i[i-1], + o_DATAOUT = dq_delayed[i], + i_DATAIN=0, + ), - i_C = ClockSignal(), - i_CINVCTRL = 0, - i_REGRST = 0, - i_LDPIPEEN = 0, - i_INC = 0, - i_CE = 0, - i_LD = self.delay_update, - i_CNTVALUEIN = self.delay_config.fields.d, - o_CNTVALUEOUT = self.delay_status.fields.q, - i_IDATAIN = dq.i[i-1], - o_DATAOUT = dq_delayed[i], - ), - else: # Don't wire up o_CNTVALUEOUT for others - self.specials += Instance("IDELAYE2", - p_DELAY_SRC = "IDATAIN", - p_SIGNAL_PATTERN = "DATA", - p_CINVCTRL_SEL = "FALSE", - p_HIGH_PERFORMANCE_MODE = "FALSE", - p_REFCLK_FREQUENCY = 200.0, - p_PIPE_SEL = "FALSE", - p_IDELAY_VALUE = dq_delay_taps, - p_IDELAY_TYPE = delay_type, - i_C = ClockSignal(), - i_CINVCTRL = 0, - i_REGRST = 0, - i_LDPIPEEN = 0 , - i_INC = 0, - i_CE = 0, - i_LD = self.delay_update, - i_CNTVALUEIN = self.delay_config.fields.d, - i_IDATAIN = dq.i[i-1], - o_DATAOUT = dq_delayed[i], - ), - else: - self.comb += dq_delayed[i].eq(dq.i[i-1]) self.specials += Instance("IDDR", name="{}{}".format(iddr_name, str(i)), p_DDR_CLK_EDGE = "SAME_EDGE_PIPELINED", i_C = dqs_iobuf, @@ -212,7 +280,6 @@ class S7SPIOPI(Module, AutoCSR, AutoDoc): ] # bit 0 (copi) is special-cased to handle SPI mode - self.dq_copi = dq_copi = TSTriple(1) # this has similar structure but an independent "oe" signal self.specials += dq_copi.get_tristate(pads.dq[0]) do_mux_rise = Signal() # mux signal for copi/dq select of bit 0 do_mux_fall = Signal() @@ -238,30 +305,28 @@ class S7SPIOPI(Module, AutoCSR, AutoDoc): i_D = dq_delayed[0], ), ] - if sim == False: - self.specials += Instance("IDELAYE2", - p_DELAY_SRC = "IDATAIN", - p_SIGNAL_PATTERN = "DATA", - p_CINVCTRL_SEL = "FALSE", - p_HIGH_PERFORMANCE_MODE = "FALSE", - p_REFCLK_FREQUENCY = 200.0, - p_PIPE_SEL = "FALSE", - p_IDELAY_VALUE = dq_delay_taps, - p_IDELAY_TYPE = delay_type, + self.specials += Instance(idelay_name, + p_DELAY_SRC = "IDATAIN", + p_SIGNAL_PATTERN = "DATA", + p_CINVCTRL_SEL = "FALSE", + p_HIGH_PERFORMANCE_MODE = "FALSE", + p_REFCLK_FREQUENCY = 200.0, + p_PIPE_SEL = "FALSE", + p_IDELAY_VALUE = dq_delay_taps, + p_IDELAY_TYPE = delay_type, - i_C = ClockSignal(), - i_CINVCTRL = 0, - i_REGRST = 0, - i_LDPIPEEN = 0, - i_INC = 0, - i_CE = 0, - i_LD = self.delay_update, - i_CNTVALUEIN = self.delay_config.fields.d, - i_IDATAIN = dq_copi.i, - o_DATAOUT = dq_delayed[0], - ), - else: - self.comb += dq_delayed[0].eq(dq_copi.i) + i_C = ClockSignal(), + i_CINVCTRL = 0, + i_REGRST = 0, + i_LDPIPEEN = 0, + i_INC = 0, + i_CE = 0, + i_LD = self.delay_update, + i_CNTVALUEIN = self.delay_config.fields.d, + i_IDATAIN = dq_copi.i, + o_DATAOUT = dq_delayed[0], + i_DATAIN=0, + ), # Wire up SCLK interface clk_en = Signal() @@ -414,12 +479,13 @@ class S7SPIOPI(Module, AutoCSR, AutoDoc): self.command = CSRStorage(description="Write individual bits to issue special commands to SPI; setting multiple bits at once leads to undefined behavior.", fields=[ CSRField("wakeup", size=1, description="Sequence through init & wakeup routine"), - CSRField("exec_cmd", size=1, description="Writing a `1` executes a manual command", pulse=True), - CSRField("cmd_code", size=8, description="Manual command code (first 8 bits, e.g. PP4B is 0x12)"), - CSRField("has_arg", size=1, description="When set, transmits the value of `cmd_arg` as the argument to the command"), + CSRField("exec_cmd", size=1, description="Writing a `1` executes a manual command", pulse=True), + CSRField("cmd_code", size=8, description="Manual command code (first 8 bits, e.g. PP4B is 0x12)"), + CSRField("has_arg", size=1, description="When set, transmits the value of `cmd_arg` as the argument to the command"), # CSRField("write_cmd", size=1, description="When `1`, `data_bytes` are written from page FIFO; when `0`, up to 4 STR `data_bytes` are read into readback CSR"), CSRField("dummy_cycles", size=5, description="Number of dummy cycles for manual command; 0 implies a write, >0 implies read"), - CSRField("data_bytes", size=8, description="Number of data bytes"), + CSRField("data_words", size=7, description="Number of data words (2x bytes)"), + CSRField("lock_reads", size=1, description="When set, locks out read operations (recommended when doing programming)"), ]) self.cmd_arg = CSRStorage(description="Command argument", fields=[ @@ -434,6 +500,11 @@ class S7SPIOPI(Module, AutoCSR, AutoDoc): fields=[ CSRField("wip", size=1, description="Operation in progress (write or erease)") ]) + self.wdata = CSRStorage(description="Page data to write to FLASH", + fields = [ + CSRField("wdata", size=16, description="16-bit wide write data presented to FLASH, committed to a 128-entry deep FIFO") + ] + ) # TODO: implement ECC detailed register readback, CRC checking # PHY machine mux -------------------------------------------------------------------------- @@ -484,6 +555,7 @@ class S7SPIOPI(Module, AutoCSR, AutoDoc): wrendiv = Signal() wrendiv2 = Signal() rx_fifo_rst_pipe = Signal() + cmd_run = Signal() self.specials += [ # This next pair of async-clear flip flops creates a write-enable gate that (a) ignores # the first two DQS strobes (as they are pipe-filling) and (b) alternates with the correct @@ -493,14 +565,14 @@ class S7SPIOPI(Module, AutoCSR, AutoDoc): i_D = ~wrendiv, o_Q = wrendiv, i_CE = 1, - i_CLR = ~rx_wren, + i_CLR = ~(rx_wren & ~cmd_run), ), Instance("FDCE", name="FDCE_WREN", i_C = dqs_iobuf, i_D = ~wrendiv2, o_Q = wrendiv2, i_CE = wrendiv & ~wrendiv2, - i_CLR = ~rx_wren, + i_CLR = ~(rx_wren & ~cmd_run), ), # Direct FIFO primitive is more resource-efficient and faster than migen primitive. Instance("FIFO_DUALCLOCK_MACRO", @@ -509,7 +581,7 @@ class S7SPIOPI(Module, AutoCSR, AutoDoc): p_DATA_WIDTH = 32, p_FIRST_WORD_FALL_THROUGH = "TRUE", p_ALMOST_EMPTY_OFFSET = 6, - p_ALMOST_FULL_OFFSET = (511 - (8*prefetch_lines)), + p_ALMOST_FULL_OFFSET = (511 - (8*prefetch_lines + 8)), # a few extra entries needed to meet DRC... o_ALMOSTEMPTY = rx_almostempty, o_ALMOSTFULL = rx_almostfull, @@ -537,7 +609,8 @@ class S7SPIOPI(Module, AutoCSR, AutoDoc): bus_ack_w = Signal() #--------- Page write data responder ----------------------- - self.submodules.txwr_fifo = SyncFIFOBuffered(width=16, depth=256) + self.submodules.txwr_fifo = SyncFIFOBuffered(width=16, depth=128) + """ got_wb_wr = Signal() got_wb_wr_r = Signal() self.comb += [ @@ -549,6 +622,11 @@ class S7SPIOPI(Module, AutoCSR, AutoDoc): got_wb_wr_r.eq(got_wb_wr), self.txwr_fifo.we.eq(got_wb_wr & ~got_wb_wr_r), bus_ack_w.eq(got_wb_wr), + ]""" + self.comb += bus.ack.eq(bus_ack_r) + self.sync += [ + self.txwr_fifo.we.eq(self.wdata.re), + self.txwr_fifo.din.eq(self.wdata.fields.wdata), ] #--------- OPI Rx Phy machine ------------------------------ @@ -590,25 +668,7 @@ class S7SPIOPI(Module, AutoCSR, AutoDoc): # TxPHY machine: OPI ------------------------------------------------------------------------- - txphy_cnt = Signal(4) - tx_run = Signal() - txphy_cs_n = Signal(reset=1) - txcmd_cs_n = Signal(reset=1) - txphy_clken = Signal() - txcmd_clken = Signal() - txphy_oe = Signal() - txcmd_oe = Signal() - txwr_cnt = Signal(8) - self.sync += opi_cs_n.eq( (tx_run & txphy_cs_n) | (~tx_run & txcmd_cs_n) ) - self.comb += If( tx_run, self.do.eq(txphy_do) ).Else( self.do.eq(txcmd_do) ) - self.comb += opi_clk_en.eq( (tx_run & txphy_clken) | (~tx_run & txcmd_clken) ) - self.comb += self.tx.eq( (tx_run & txphy_oe) | (~tx_run & txcmd_oe) ) - tx_almostfull = Signal() - self.sync += tx_almostfull.eq(rx_almostfull) # sync the rx_almostfull signal into the local clock domain - txphy_bus = Signal() - self.sync += txphy_bus.eq(bus.cyc & bus.stb & ~bus.we & ((bus.cti == 2) | (bus.cti == 0))) - tx_resetcycle = Signal() - + run_is_hot = Signal() # indicates that the receive FIFO is hot and needs a reset before going into a cmd cmd_req = Signal() cmd_ack = Signal() self.sync += [ @@ -620,8 +680,39 @@ class S7SPIOPI(Module, AutoCSR, AutoDoc): cmd_req.eq(cmd_req) ) ] - cmd_run = Signal() cmd_done = Signal() + wip_state = Signal() + self.comb += self.status.fields.wip.eq(wip_state | cmd_req) # need combinational loop-back to repsond to fast WIP inquiries + self.sync += [ + If(cmd_done, + wip_state.eq(0), + ).Elif(cmd_run | cmd_req | cmd_done, # lock out writing through the entire life cycle + wip_state.eq(1) + ).Else( + wip_state.eq(wip_state) + ) + ] + + txphy_cnt = Signal(4) + tx_run = Signal() + txphy_cs_n = Signal(reset=1) + txcmd_cs_n = Signal(reset=1) + txphy_clken = Signal() + txcmd_clken = Signal() + txphy_oe = Signal() + txcmd_oe = Signal() + txwr_cnt = Signal(8) + tx_run_d = Signal() + self.sync += tx_run_d.eq(tx_run) + self.sync += opi_cs_n.eq( (tx_run_d & txphy_cs_n) | (~tx_run_d & ~cmd_run & txcmd_cs_n) | (cmd_run & txphy_cs_n) ) + self.comb += If( tx_run | cmd_run, self.do.eq(txphy_do) ).Else( self.do.eq(txcmd_do) ) + self.comb += opi_clk_en.eq( (tx_run & txphy_clken) | (~tx_run & txcmd_clken) | (cmd_run & txphy_clken) ) + self.comb += self.tx.eq( (tx_run & txphy_oe) | (~tx_run & txcmd_oe) | (cmd_run & txphy_oe) ) + tx_almostfull = Signal() + self.sync += tx_almostfull.eq(rx_almostfull) # sync the rx_almostfull signal into the local clock domain + txphy_bus = Signal() + self.sync += txphy_bus.eq(bus.cyc & bus.stb & ~bus.we & ((bus.cti == 2) | (bus.cti == 0))) + tx_resetcycle = Signal() self.submodules.txphy = txphy = FSM(reset_state="RESET") txphy.act("RESET", @@ -632,8 +723,17 @@ class S7SPIOPI(Module, AutoCSR, AutoDoc): NextValue(cmd_done, 0), # guarantee that the first state we go to out of reset is a four-cycle burst NextValue(txphy_cnt, 4), - If( (tx_run | cmd_run) & ~spi_mode, + If( tx_run & ~spi_mode, NextState("TX_SETUP") + ).Elif( cmd_run & ~spi_mode & ~cmd_done, # have to look at cmd_done because of delay from done-to-clear of run + If(run_is_hot, + NextValue(txphy_clken, 1), + NextValue(opi_reset_rx_req, 1), + NextValue(txphy_cs_n, 0), + NextState("TX_RESET_BEFORE_CMD"), + ).Else( + NextState("TX_SETUP_CMD") + ) ) ) txphy.act("TX_SETUP", @@ -650,8 +750,6 @@ class S7SPIOPI(Module, AutoCSR, AutoDoc): txphy.act("TX_CMD_CS_DELAY", # meet setup timing for CS-to-clock If( tx_run, NextState("TX_CMD") - ).Elif( cmd_run, - NextState("TX_MAN_CMD") ) ) txphy.act("TX_CMD", @@ -721,6 +819,31 @@ class S7SPIOPI(Module, AutoCSR, AutoDoc): NextValue(txphy_clken, 1), ) ) + # issue a Rx FIFO reset before going into command mode + txphy.act("TX_RESET_BEFORE_CMD", + NextValue(txphy_clken, 1), + NextValue(opi_reset_rx_req, 0), + If(opi_reset_rx_ack, + NextValue(txphy_clken, 0), + NextState("TX_SETUP_CMD") + ) + ) + # mirror setup here because once we count down the delay, it must be atomic to this FSM path + # and we need the full 40ns of CS delay every time we go down this path! + txphy.act("TX_SETUP_CMD", + NextValue(opi_rx_run, 0), + NextValue(txphy_cnt, txphy_cnt - 1), + If( txphy_cnt > 0, + NextValue(txphy_cs_n, 1) + ).Else( + NextValue(txphy_cs_n, 0), + NextValue(txphy_oe, 1), + NextState("TX_CMD_MAN_CS_DELAY") + ) + ) + txphy.act("TX_CMD_MAN_CS_DELAY", + NextState("TX_MAN_CMD") + ), txphy.act("TX_MAN_CMD", NextValue(txphy_do, Cat(~self.command.fields.cmd_code, self.command.fields.cmd_code)), NextValue(txphy_clken, 1), @@ -729,12 +852,11 @@ class S7SPIOPI(Module, AutoCSR, AutoDoc): ).Elif(self.command.fields.dummy_cycles > 0, # implies a read NextValue(txphy_cnt, self.command.fields.dummy_cycles - 1), NextState("TX_MAN_DUMMY") - ).Elif(self.command.fields.data_bytes > 0, # write is implied if dummy cycles is 0 - NextValue(txwr_cnt, self.command.fields.data_bytes), + ).Elif(self.command.fields.data_words > 0, # write is implied if dummy cycles is 0 + NextValue(txwr_cnt, self.command.fields.data_words), NextState("TX_WRDATA") ).Else( # simple command with no data or readback - NextState("RESET"), - NextValue(cmd_done, 1), + NextState("TX_WR_RESET"), ) ) txphy.act("TX_ARGHI", @@ -747,7 +869,7 @@ class S7SPIOPI(Module, AutoCSR, AutoDoc): NextValue(txphy_cnt, self.command.fields.dummy_cycles - 1), NextState("TX_MAN_DUMMY") ).Else(# self.command.fields.write_cmd, # write is implied if dummy cycles is 0 - NextValue(txwr_cnt, self.command.fields.data_bytes), + NextValue(txwr_cnt, self.command.fields.data_words - 1), NextState("TX_WRDATA") ) ) @@ -756,19 +878,25 @@ class S7SPIOPI(Module, AutoCSR, AutoDoc): NextValue(txphy_do, 0), NextValue(txphy_cnt, txphy_cnt - 1), If(txphy_cnt == 0, - NextValue(opi_rx_run, 1), # always a readback after a dummy cycle - NextValue(txphy_cnt, self.command.fields.data_bytes[:4] - 1), # ignore upper bits + # ignore upper bits, and note that +1 cycle is added because we have to pump DQS a dummy cycle to push data through the rbk pipe + # the SEEPROM mostly handles the extra pump OK. + NextValue(txphy_cnt, self.command.fields.data_words[:4] - 1 + 1), NextState("TX_MAN_RBK"), ) ) txphy.act("TX_MAN_RBK", If(txphy_cnt == 0, - NextValue(txphy_clken, 1), - NextValue(opi_reset_rx_req, 1), - NextState("TX_RESET_RX"), + NextState("TX_MAN_RBK_WAIT"), + NextValue(txphy_cnt, 4), + ).Else( + NextValue(txphy_cnt, txphy_cnt - 1), + ) + ) + txphy.act("TX_MAN_RBK_WAIT", # need to wait some cycles for the readback data to return from the device before latching it + If(txphy_cnt == 0, NextValue(self.cmd_rbk_data.fields.cmd_rbk_data, rbk_data), - NextValue(cmd_done, 1), # done with readback + NextState("TX_WR_RESET"), ).Else( NextValue(txphy_cnt, txphy_cnt - 1), ) @@ -783,11 +911,8 @@ class S7SPIOPI(Module, AutoCSR, AutoDoc): ) ) txphy.act("TX_WR_RESET", - NextValue(opi_rx_run, 0), NextValue(txphy_oe, 0), - NextValue(txphy_cs_n, 1), NextValue(txphy_clken, 0), - NextValue(cmd_done, 0), # drain any excess values in the page FIFO If(self.txwr_fifo.readable, self.txwr_fifo.re.eq(1), @@ -806,6 +931,7 @@ class S7SPIOPI(Module, AutoCSR, AutoDoc): NextValue(tx_run, 0), NextValue(cmd_run, 0), NextValue(txcmd_cs_n, 1), + NextValue(run_is_hot, 0), If(~spi_mode, NextState("IDLE") ).Else( @@ -830,8 +956,12 @@ class S7SPIOPI(Module, AutoCSR, AutoDoc): # - if so, wait until the current bus cycle is done, then de-assert tx_run # - then run the command # - Else wait until a bus cycle, and once it happens, put the system into run mode - If(bus.cyc & bus.stb, + If(bus.cyc & bus.stb & ~self.command.fields.lock_reads, If(~bus.we & ((bus.cti == 2) | (bus.cti == 0)), + If(~run_is_hot, + NextValue(opi_addr, Cat(Signal(2), bus.adr)), + ), + NextValue(run_is_hot, 1), NextState("TX_RUN") ).Else( # Handle other cases here, e.g. what do we do if we get a write? probably @@ -844,7 +974,7 @@ class S7SPIOPI(Module, AutoCSR, AutoDoc): ) opicmd.act("TX_RUN", NextValue(tx_run, 1), - If(cmd_req, # Respond to commands + If(cmd_req | self.command.fields.lock_reads, # Respond to commands NextState("WAIT_DISPATCH") ) ) @@ -852,19 +982,26 @@ class S7SPIOPI(Module, AutoCSR, AutoDoc): opicmd.act("WAIT_DISPATCH", If( ~(bus.cyc & bus.stb), NextValue(tx_run, 0), + NextValue(cmd_run, 1), NextState("DISPATCH_CMD") ) ) opicmd.act("DISPATCH_CMD", cmd_ack.eq(1), # clear the command dispatch pulse cache If(cmd_done, + NextValue(run_is_hot, 0), NextValue(cmd_run, 0), - NextState("TX_RUN"), + NextValue(tx_run, 0), + NextState("IDLE"), ).Else( NextValue(cmd_run, 1), ) ) + ############################################################################################ + ############################################################################################ + ############################################################################################ + ############################################################################################ # MAC/PHY abstraction for the SPI machine spi_req = Signal() spi_ack = Signal() From fc59bcd8338c8c39f4e694e7a8f15a02f0914500 Mon Sep 17 00:00:00 2001 From: bunnie Date: Fri, 6 Nov 2020 04:43:23 +0800 Subject: [PATCH 4/6] add facility for burst writing and fix pp4b command bug --- litex/soc/cores/spi_opi.py | 49 +++++++++++++++++++++++++------------- 1 file changed, 32 insertions(+), 17 deletions(-) diff --git a/litex/soc/cores/spi_opi.py b/litex/soc/cores/spi_opi.py index b441322a8..4b234f820 100644 --- a/litex/soc/cores/spi_opi.py +++ b/litex/soc/cores/spi_opi.py @@ -484,7 +484,7 @@ class S7SPIOPI(Module, AutoCSR, AutoDoc): CSRField("has_arg", size=1, description="When set, transmits the value of `cmd_arg` as the argument to the command"), # CSRField("write_cmd", size=1, description="When `1`, `data_bytes` are written from page FIFO; when `0`, up to 4 STR `data_bytes` are read into readback CSR"), CSRField("dummy_cycles", size=5, description="Number of dummy cycles for manual command; 0 implies a write, >0 implies read"), - CSRField("data_words", size=7, description="Number of data words (2x bytes)"), + CSRField("data_words", size=8, description="Number of data words (2x bytes)"), CSRField("lock_reads", size=1, description="When set, locks out read operations (recommended when doing programming)"), ]) self.cmd_arg = CSRStorage(description="Command argument", @@ -610,24 +610,37 @@ class S7SPIOPI(Module, AutoCSR, AutoDoc): #--------- Page write data responder ----------------------- self.submodules.txwr_fifo = SyncFIFOBuffered(width=16, depth=128) - """ - got_wb_wr = Signal() - got_wb_wr_r = Signal() + self.submodules.pgwr = pgwr = FSM(reset_state="IDLE") + pgwr.act("IDLE", + If(self.wdata.re, + self.txwr_fifo.we.eq(1), + self.txwr_fifo.din.eq(self.wdata.fields.wdata) + ).Elif(bus.cyc & bus.stb & bus.we, + self.txwr_fifo.din.eq(bus.dat_w[:16]), # lower 16 bits first + self.txwr_fifo.we.eq(1), + NextState("HIWORD") + ).Else( + self.txwr_fifo.we.eq(0), + bus_ack_w.eq(0) + ) + ) + pgwr.act("HIWORD", + self.txwr_fifo.din.eq(bus.dat_w[16:]), # top 16 next + self.txwr_fifo.we.eq(1), + bus_ack_w.eq(1), + NextState("WAIT_DONE") + ) + pgwr.act("WAIT_DONE", + If( ~(bus.cyc & bus.stb & bus.we), + NextState("IDLE"), + bus_ack_w.eq(0), + ).Else( + bus_ack_w.eq(1), + ) + ) self.comb += [ - self.txwr_fifo.din.eq(bus.dat_r[:16]), # lower 16 bits only used - got_wb_wr.eq(bus.cyc & bus.stb & bus.we), bus.ack.eq(bus_ack_r | bus_ack_w), ] - self.sync += [ - got_wb_wr_r.eq(got_wb_wr), - self.txwr_fifo.we.eq(got_wb_wr & ~got_wb_wr_r), - bus_ack_w.eq(got_wb_wr), - ]""" - self.comb += bus.ack.eq(bus_ack_r) - self.sync += [ - self.txwr_fifo.we.eq(self.wdata.re), - self.txwr_fifo.din.eq(self.wdata.fields.wdata), - ] #--------- OPI Rx Phy machine ------------------------------ self.submodules.rxphy = rxphy = FSM(reset_state="IDLE") @@ -868,9 +881,11 @@ class S7SPIOPI(Module, AutoCSR, AutoDoc): If(self.command.fields.dummy_cycles > 0, NextValue(txphy_cnt, self.command.fields.dummy_cycles - 1), NextState("TX_MAN_DUMMY") - ).Else(# self.command.fields.write_cmd, # write is implied if dummy cycles is 0 + ).Elif(self.command.fields.data_words > 0, # self.command.fields.write_cmd, # write is implied if dummy cycles is 0 and data exists NextValue(txwr_cnt, self.command.fields.data_words - 1), NextState("TX_WRDATA") + ).Else( + NextState("TX_WR_RESET") ) ) txphy.act("TX_MAN_DUMMY", From d892c6f8f57f005822274b331fe76d696e3144e7 Mon Sep 17 00:00:00 2001 From: bunnie Date: Sat, 7 Nov 2020 03:57:46 +0800 Subject: [PATCH 5/6] minor bug fixes in spi writing; USB-based flashing is not working --- litex/soc/cores/spi_opi.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/litex/soc/cores/spi_opi.py b/litex/soc/cores/spi_opi.py index 4b234f820..0ba76f369 100644 --- a/litex/soc/cores/spi_opi.py +++ b/litex/soc/cores/spi_opi.py @@ -502,7 +502,10 @@ class S7SPIOPI(Module, AutoCSR, AutoDoc): ]) self.wdata = CSRStorage(description="Page data to write to FLASH", fields = [ - CSRField("wdata", size=16, description="16-bit wide write data presented to FLASH, committed to a 128-entry deep FIFO") + CSRField("wdata", size=16, description="""16-bit wide write data presented to FLASH, committed to a 128-entry deep FIFO. + Writes to this register are not cached; note that writes to the SPINOR address space are also committed + to the FIFO, but this space is cached by the CPU, and therefore not guaranteed to be coherent or in order. + The direct wishbone-write address space is provisioned for e.g. USB bus masters that don't have caching.""") ] ) # TODO: implement ECC detailed register readback, CRC checking @@ -918,6 +921,7 @@ class S7SPIOPI(Module, AutoCSR, AutoDoc): ) txphy.act("TX_WRDATA", If(txwr_cnt == 0, + NextValue(txphy_do, self.txwr_fifo.dout), NextState("TX_WR_RESET"), ).Else( NextValue(txwr_cnt, txwr_cnt - 1), From 036ea48a4d66d9e4c898799248ce23470232c1b6 Mon Sep 17 00:00:00 2001 From: bunnie Date: Mon, 9 Nov 2020 16:43:01 +0800 Subject: [PATCH 6/6] update constraints to be in-line with litex methodology --- litex/soc/cores/spi_opi.py | 43 +++++++++++++++++++++----------------- 1 file changed, 24 insertions(+), 19 deletions(-) diff --git a/litex/soc/cores/spi_opi.py b/litex/soc/cores/spi_opi.py index 0ba76f369..1231fc8d2 100644 --- a/litex/soc/cores/spi_opi.py +++ b/litex/soc/cores/spi_opi.py @@ -14,19 +14,7 @@ from litex.soc.integration.doc import AutoDoc, ModuleDoc class S7SPIOPI(Module, AutoCSR, AutoDoc): - def __init__(self, platform, padgroup_name, - dq_delay_taps = 0, - sclk_name = "SCLK_ODDR", - iddr_name = "SPI_IDDR", - cipo_name = "CIPO_FDRE", - sim = False, - spiread = False, - prefetch_lines = 1): - - pads = platform.request(padgroup_name) - self.dq = dq = TSTriple(7) # dq[0] is special because it is also copi - self.dq_copi = dq_copi = TSTriple(1) # this has similar structure but an independent "oe" signal - + def add_timing_constraints(self, platform, padgroup_name): # reminder to self: the {{ and }} overloading is because Python treats these as special in strings, so {{ -> { in actual constraint # NOTE: ECSn is deliberately not constrained -- it's more or less async (0-10ns delay on the signal, only meant to line up with "block" region @@ -42,7 +30,7 @@ class S7SPIOPI(Module, AutoCSR, AutoDoc): # derive clock for SCLK - clock-forwarded from DDR see Xilinx answer 62488 use case #4 platform.add_platform_command( "create_generated_clock -name spiclk_out -multiply_by 1 -source [get_pins {}/Q] [get_ports {}_sclk]".format( - sclk_name, padgroup_name)) + self.sclk_name, padgroup_name)) # constrain CIPO SDR delay -- WARNING: -max is 'actually' 5.0ns, but design can't meet timing @ 5.0 tPD from SPIROM. There is some margin in the timing closure tho, so 4.5ns is probably going to work.... platform.add_platform_command( @@ -51,11 +39,11 @@ class S7SPIOPI(Module, AutoCSR, AutoDoc): "set_input_delay -clock [get_clocks spiclk_out] -clock_fall -min 1 [get_ports {}_dq[1]]".format(padgroup_name)) # corresponding false path on CIPO DDR input when clocking SDR data platform.add_platform_command( - "set_false_path -from [get_clocks spiclk_out] -to [get_pin {}/D ]".format(iddr_name + "1")) + "set_false_path -from [get_clocks spiclk_out] -to [get_pin {}/D ]".format(self.iddr_name + "1")) # corresponding false path on CIPO SDR input from DQS strobe, only if the cipo path is used - if spiread: + if self.spiread: platform.add_platform_command( - "set_false_path -from [get_clocks spidqs] -to [get_pin {}/D ]".format(cipo_name)) + "set_false_path -from [get_clocks spidqs] -to [get_pin {}/D ]".format(self.cipo_name)) # constrain CLK-to-DQ output DDR delays; copi uses the same rules platform.add_platform_command( @@ -72,9 +60,26 @@ class S7SPIOPI(Module, AutoCSR, AutoDoc): platform.add_platform_command( "set_output_delay -clock [get_clocks spiclk_out] -max 1 [get_ports {}_cs_n]".format(padgroup_name)) # 4.5 in reality # unconstrain OE path - we have like 10+ dummy cycles to turn the bus on wr->rd, and 2+ cycles to turn on end of read - platform.add_platform_command("set_false_path -through [ get_pins {net}_reg/Q ]", net=dq.oe) + platform.add_platform_command("set_false_path -through [ get_pins {net}_reg/Q ]", net=self.dq.oe) platform.add_platform_command("set_false_path -through [ get_pins {net}_reg/Q ]", - net=dq_copi.oe) + net=self.dq_copi.oe) + + def __init__(self, pads, + dq_delay_taps = 0, + sclk_name = "SCLK_ODDR", + iddr_name = "SPI_IDDR", + cipo_name = "CIPO_FDRE", + sim = False, + spiread = False, + prefetch_lines = 1): + + self.sclk_name = sclk_name + self.iddr_name = iddr_name + self.cipo_name = cipo_name + self.spiread = spiread + + self.dq = dq = TSTriple(7) # dq[0] is special because it is also copi + self.dq_copi = dq_copi = TSTriple(1) # this has similar structure but an independent "oe" signal self.intro = ModuleDoc("""Intro