Merge pull request #478 from antmicro/extended_spi_flash

Extended SPI flash support
This commit is contained in:
enjoy-digital 2020-05-12 16:42:01 +02:00 committed by GitHub
commit 2a5a7536b8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 140 additions and 49 deletions

View File

@ -2,6 +2,7 @@
# This file is Copyright (c) 2014-2018 Florent Kermarrec <florent@enjoy-digital.fr>
# This file is Copyright (c) 2013-2014 Robert Jordens <jordens@gmail.com>
# This file is Copyright (c) 2015-2014 Sebastien Bourdeauducq <sb@m-labs.hk>
# This file is Copyright (c) 2020 Antmicro <www.antmicro.com>
# License: BSD
@ -18,16 +19,13 @@ from litex.soc.cores.spi import SPIMaster
# SpiFlash Quad/Dual/Single (memory-mapped) --------------------------------------------------------
_FAST_READ = 0x0b
_DIOFR = 0xbb
_QIOFR = 0xeb
_QIOPP = 0x12
def _format_cmd(cmd, spi_width):
"""
`cmd` is the read instruction. Since everything is transmitted on all
dq lines (cmd, adr and data), extend/interleave cmd to full pads.dq
width even if dq1-dq3 are don't care during the command phase:
dq lines (cmd, adr and data), we need to reformat cmd in single spi mode.
For example, for N25Q128, 0xeb is the quad i/o fast read, and
extended to 4 bits (dq1,dq2,dq3 high) is: 0xfffefeff
"""
@ -37,6 +35,12 @@ def _format_cmd(cmd, spi_width):
c &= ~(1<<(b*spi_width))
return c
def accumulate_timeline_deltas(seq):
t, tseq = 0, []
for dt, a in seq:
tseq.append((t, a))
t += dt
return tseq
class SpiFlashCommon(Module):
def __init__(self, pads):
@ -85,6 +89,7 @@ class SpiFlashDualQuad(SpiFlashCommon, AutoCSR):
SpiFlashCommon.__init__(self, pads)
self.bus = bus = wishbone.Interface()
spi_width = len(pads.dq)
max_transfer_size = 8*8
assert spi_width >= 2
if with_bitbang:
@ -104,28 +109,24 @@ class SpiFlashDualQuad(SpiFlashCommon, AutoCSR):
self.miso = CSRStatus(description="Incoming value of MISO signal.")
self.bitbang_en = CSRStorage(description="Write a ``1`` here to disable memory-mapped mode and enable bitbang mode.")
# # #
queue = self.queue = CSRStatus(4)
in_len = self.in_len = CSRStorage(4)
out_len = self.out_len = CSRStorage(4)
in_left = self.in_left = Signal(max=2**8)
out_left = self.out_left = Signal(max=2**8)
self.quad_transfer = Signal(reset=0)
spi_in = self.spi_in = CSRStorage(max_transfer_size)
spi_out = self.spi_out = CSRStatus(max_transfer_size)
cs_n = Signal(reset=1)
clk = Signal()
dq_oe = Signal()
wbone_width = len(bus.dat_r)
read_cmd_params = {
4: (_format_cmd(_QIOFR, 4), 4*8),
2: (_format_cmd(_DIOFR, 2), 2*8),
1: (_format_cmd(_FAST_READ, 1), 1*8)
}
read_cmd, cmd_width = read_cmd_params[spi_width]
cmd_width = 8
addr_width = 24
dq = TSTriple(spi_width)
# Keep DQ2,DQ3 as outputs during bitbang, this ensures they activate ~WP or ~HOLD functions
self.specials.dq0 = Tristate(pads.dq[0], o=dq.o[0], i=dq.i[0], oe=dq.oe)
self.specials.dq1 = Tristate(pads.dq[1], o=dq.o[1], i=dq.i[1], oe=dq.oe)
self.specials.dq2 = Tristate(pads.dq[2], o=dq.o[2], i=dq.i[2], oe=(dq.oe | self.bitbang_en.storage))
self.specials.dq3 = Tristate(pads.dq[3], o=dq.o[3], i=dq.i[3], oe=(dq.oe | self.bitbang_en.storage))
sr = Signal(max(cmd_width, addr_width, wbone_width))
if endianness == "big":
@ -133,12 +134,34 @@ class SpiFlashDualQuad(SpiFlashCommon, AutoCSR):
else:
self.comb += bus.dat_r.eq(reverse_bytes(sr))
hw_read_logic = [
self.specials.dq0 = Tristate(pads.dq[0], o=dq.o[0], i=dq.i[0], oe=dq.oe)
self.specials.dq1 = Tristate(pads.dq[1], o=dq.o[1], i=dq.i[1], oe=dq.oe)
if with_bitbang:
# Keep DQ2,DQ3 as outputs during bitbang, this ensures they activate ~WP or ~HOLD functions
self.specials.dq2 = Tristate(pads.dq[2], o=dq.o[2], i=dq.i[2], oe=(dq.oe | self.bitbang_en.storage))
self.specials.dq3 = Tristate(pads.dq[3], o=dq.o[3], i=dq.i[3], oe=(dq.oe | self.bitbang_en.storage))
else:
self.specials.dq2 = Tristate(pads.dq[2], o=dq.o[2], i=dq.i[2], oe=dq.oe)
self.specials.dq3 = Tristate(pads.dq[3], o=dq.o[3], i=dq.i[3], oe=dq.oe)
sr = Signal(max(cmd_width, addr_width, wbone_width, max_transfer_size))
if endianness == "big":
self.comb += bus.dat_r.eq(sr[:wbone_width])
else:
self.comb += bus.dat_r.eq(reverse_bytes(sr[:wbone_width]))
hw_read_logic_single = [
pads.clk.eq(clk),
pads.cs_n.eq(cs_n),
dq.o.eq(sr[-spi_width:]),
dq.oe.eq(dq_oe)
]
hw_read_logic_quad = [
pads.clk.eq(clk),
pads.cs_n.eq(cs_n),
dq.o.eq(Cat(sr[-1:], Replicate(1, 3))),
dq.oe.eq(dq_oe)
]
if with_bitbang:
bitbang_logic = [
@ -165,58 +188,126 @@ class SpiFlashDualQuad(SpiFlashCommon, AutoCSR):
self.comb += [
If(self.bitbang_en.storage,
bitbang_logic
).Elif(self.quad_transfer,
hw_read_logic_single
).Else(
hw_read_logic
hw_read_logic_quad
)
]
else:
self.comb += hw_read_logic
self.comb += [
If(self.quad_transfer,
hw_read_logic_single
).Else(
hw_read_logic_quad
)
]
if div < 2:
raise ValueError("Unsupported value \'{}\' for div parameter for SpiFlash core".format(div))
else:
i = Signal(max=div)
dqi = Signal(spi_width)
self.sync += [
If(i == div//2 - 1,
clk.eq(1),
dqi.eq(dq.i),
),
If(i == div - 1,
i.eq(0),
clk.eq(0),
sr.eq(Cat(dqi, sr[:-spi_width]))
).Else(
i.eq(i + 1),
),
]
# spi is byte-addressed, prefix by zeros
z = Replicate(0, log2_int(wbone_width//8))
i = Signal(max=div)
dqi = Signal(spi_width)
seq = [
(cmd_width//spi_width*div,
[dq_oe.eq(1), cs_n.eq(0), sr[-cmd_width:].eq(read_cmd)]),
# SPI or memmap mode
self.mode = Signal()
self.sync += [
If(i == div//2 - 1,
clk.eq(1),
dqi.eq(dq.i),
),
If(i == div - 1,
i.eq(0),
clk.eq(0),
If(self.quad_transfer,
sr.eq(Cat(dqi, sr[:-spi_width]))
).Else(
sr.eq(Cat(dqi[1], sr[:-1]))
)
).Else(
i.eq(i + 1),
),
]
read_seq = [
(4*cmd_width//spi_width*div,
[dq_oe.eq(1), cs_n.eq(0), sr[-cmd_width:].eq(_QIOFR), self.quad_transfer.eq(0)]),
(addr_width//spi_width*div,
[sr[-addr_width:].eq(Cat(z, bus.adr))]),
((dummy + wbone_width//spi_width)*div,
[sr[-addr_width:].eq(Cat(z, bus.adr)), self.quad_transfer.eq(1)]),
((1+dummy + wbone_width//spi_width)*div,
[dq_oe.eq(0)]),
(1,
[bus.ack.eq(1), cs_n.eq(1)]),
(div, # tSHSL!
[bus.ack.eq(0)]),
(0,
[]),
[queue.status[0].eq(0)]),
]
# accumulate timeline deltas
t, tseq = 0, []
for dt, a in seq:
tseq.append((t, a))
t += dt
self.sync += timeline(bus.cyc & bus.stb & (i == div - 1), tseq)
write_seq = [
(4*cmd_width//spi_width*div,
[dq_oe.eq(1), cs_n.eq(0), sr[-cmd_width:].eq(_QIOPP), self.quad_transfer.eq(0)]),
(addr_width//spi_width*div,
[sr[-addr_width:].eq(Cat(z, bus.adr)), self.quad_transfer.eq(1)]),
((wbone_width//spi_width)*div,
[sr[-wbone_width:].eq(reverse_bytes(bus.dat_w))]),
(1,
[bus.ack.eq(1), cs_n.eq(1)]),
(div,
[bus.ack.eq(0)]),
(0,
[queue.status[1].eq(0)]),
]
# prepare spi transfer
self.sync += If(self.out_len.re & (self.out_len.storage != 0) & self.en_quad.storage[0],
self.out_left.eq(Cat(1, self.out_len.storage)
)
)
self.sync += If(self.out_len.re & (self.out_len.storage == 0),
self.out_left.eq(0)
)
self.sync += If(self.out_len.re & (self.out_len.storage != 0) & ~self.en_quad.storage[0],
self.out_left.eq(Cat(1, Replicate(0, 2), self.out_len.storage))
)
self.sync += If(self.in_len.re & (self.in_len.storage != 0) & ~self.en_quad.storage[0],
[queue.status[2].eq(1),
self.in_left.eq(Cat(Replicate(0, 3), in_len.storage)),
self.quad_transfer.eq(0)]
)
# write data to sr
self.sync += If(queue.status[2] & (i == div - 1) & ~self.en_quad.storage[0],
sr[-max_transfer_size:].eq(self.spi_in.storage), queue.status[2].eq(0), queue.status[3].eq(1), cs_n.eq(0), dq_oe.eq(1))
# count spi to slave transfer cycles
self.sync += If(queue.status[3] & (self.in_left > 0) & (i == div - 1), self.in_left.eq(self.in_left - 1), dq_oe.eq(1))
# count spi to master transfer cycles
self.sync += If(queue.status[3] & (self.in_left < 1) & (self.out_left > 0) & (i == div - 1), self.out_left.eq(self.out_left - 1), dq_oe.eq(0))
#end transmision and read data from sr
self.sync += If(~self.in_len.re & (in_left < 1) & (out_left < 1) & queue.status[3], queue.status[3].eq(0), cs_n.eq(1),
If(self.out_len.storage == 1, self.spi_out.status.eq(Cat(Replicate(0, 8*7), sr))
).Elif(self.out_len.storage == 2, self.spi_out.status.eq(Cat(Replicate(0, 8*6), sr))
).Elif(self.out_len.storage == 3, self.spi_out.status.eq(Cat(Replicate(0, 8*5), sr))
).Elif(self.out_len.storage == 4, self.spi_out.status.eq(Cat(Replicate(0, 8*4), sr))
).Elif(self.out_len.storage == 5, self.spi_out.status.eq(Cat(Replicate(0, 8*3), sr))
).Elif(self.out_len.storage == 6, self.spi_out.status.eq(Cat(Replicate(0, 8*2), sr))
).Elif(self.out_len.storage == 7, self.spi_out.status.eq(Cat(Replicate(0, 8*1), sr))
).Else(self.spi_out.status.eq(sr)))
# detect mem map access
self.sync += If(~self.mode & bus.cyc & bus.stb & ~bus.we, queue.status[0].eq(1))
self.sync += If(~self.mode & bus.cyc & bus.stb & bus.we, queue.status[1].eq(1))
self.sync += timeline(queue.status[0] & ~self.en_quad.storage[0] & (i == div - 1), accumulate_timeline_deltas(read_seq))
self.sync += timeline(queue.status[1] & ~self.en_quad.storage[0] & (i == div - 1), accumulate_timeline_deltas(write_seq))
class SpiFlashSingle(SpiFlashCommon, AutoCSR):