Merge pull request #221 from enjoy-digital/write_latency

Add dynamic write latency calibration.
This commit is contained in:
enjoy-digital 2020-10-12 19:42:26 +02:00 committed by GitHub
commit 85fa02afc7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 124 additions and 41 deletions

View File

@ -97,6 +97,15 @@ class BenchSoC(SoCCore):
self.add_csr("ethphy")
self.add_etherbone(phy=self.ethphy)
# Analyzer ---------------------------------------------------------------------------------
from litescope import LiteScopeAnalyzer
analyzer_signals = [self.ddrphy.dfi]
self.submodules.analyzer = LiteScopeAnalyzer(analyzer_signals,
depth = 512,
clock_domain = "sys",
csr_csv = "analyzer.csv")
self.add_csr("analyzer")
# Leds -------------------------------------------------------------------------------------
from litex.soc.cores.led import LedChaser
self.submodules.leds = LedChaser(

View File

@ -117,23 +117,24 @@ class PHYPadsCombiner:
# BitSlip ------------------------------------------------------------------------------------------
class BitSlip(Module):
def __init__(self, dw, rst=None, slp=None, cycles=1):
self.i = Signal(dw)
self.o = Signal(dw)
self.rst = Signal() if rst is None else rst
self.slp = Signal() if slp is None else slp
def __init__(self, dw, i=None, o=None, rst=None, slp=None, cycles=1):
self.i = Signal(dw) if i is None else i
self.o = Signal(dw) if o is None else o
self.rst = Signal() if rst is None else rst
self.slp = Signal() if slp is None else slp
assert cycles >= 1
# # #
value = Signal(max=cycles*dw)
value = Signal(max=cycles*dw, reset=cycles*dw-1)
self.sync += If(self.slp, value.eq(value + 1))
self.sync += If(self.rst, value.eq(0))
self.sync += If(self.rst, value.eq(value.reset))
r = Signal((cycles+1)*dw, reset_less=True)
self.sync += r.eq(Cat(r[dw:], self.i))
cases = {}
for i in range(cycles*dw):
cases[i] = self.o.eq(r[i:dw+i])
cases[i] = self.o.eq(r[i+1:dw+i+1])
self.comb += Case(value, cases)
# TappedDelayLine ----------------------------------------------------------------------------------

View File

@ -507,7 +507,10 @@ def get_sdram_phy_c_header(phy_settings, timing_settings):
r += "#define SDRAM_PHY_WRITE_LEVELING_CAPABLE\n"
if phytype in ["USDDRPHY", "USPDDRPHY"]:
r += "#define SDRAM_PHY_WRITE_LEVELING_REINIT\n"
if phytype in ["USDDRPHY", "USPDDRPHY", "A7DDRPHY", "K7DDRPHY", "V7DDRPHY", "ECP5DDRPHY"]:
if phytype in ["USDDRPHY", "USPDDRPHY", "A7DDRPHY", "K7DDRPHY", "V7DDRPHY"]:
r += "#define SDRAM_PHY_WRITE_LATENCY_CALIBRATION_CAPABLE\n"
r += "#define SDRAM_PHY_READ_LEVELING_CAPABLE\n"
if phytype in ["ECP5DDRPHY"]:
r += "#define SDRAM_PHY_READ_LEVELING_CAPABLE\n"
# Define number of modules/delays/bitslips

View File

@ -24,6 +24,30 @@ from litex.soc.interconnect.csr import *
from litedram.common import *
from litedram.phy.dfi import *
# BitSlip ------------------------------------------------------------------------------------------
# FIXME: Use BitSlip from litedram.common.
class BitSlip(Module):
def __init__(self, dw, rst=None, slp=None, cycles=1):
self.i = Signal(dw)
self.o = Signal(dw)
self.rst = Signal() if rst is None else rst
self.slp = Signal() if slp is None else slp
# # #
value = Signal(max=cycles*dw)
self.sync += If(self.slp, value.eq(value + 1))
self.sync += If(self.rst, value.eq(0))
r = Signal((cycles+1)*dw, reset_less=True)
self.sync += r.eq(Cat(r[dw:], self.i))
cases = {}
for i in range(cycles*dw):
cases[i] = self.o.eq(r[i:dw+i])
self.comb += Case(value, cases)
# Lattice ECP5 DDR PHY Initialization --------------------------------------------------------------
class ECP5DDRPHYInit(Module):
@ -108,6 +132,8 @@ class ECP5DDRPHY(Module, AutoCSR):
cwl_sys_latency = get_sys_latency(nphases, cwl)
# Registers --------------------------------------------------------------------------------
self._rst = CSRStorage()
self._dly_sel = CSRStorage(databits//8)
self._rdly_dq_rst = CSR()
@ -154,7 +180,7 @@ class ECP5DDRPHY(Module, AutoCSR):
for i in range(len(pads.clk_p)):
sd_clk_se = Signal()
self.specials += Instance("ODDRX2F",
i_RST = ResetSignal("sys"),
i_RST = ResetSignal("sys") | self._rst.storage,
i_SCLK = ClockSignal("sys"),
i_ECLK = ClockSignal("sys2x"),
**{f"i_D{n}": (0b1010 >> n) & 0b1 for n in range(4)},
@ -177,7 +203,7 @@ class ECP5DDRPHY(Module, AutoCSR):
pad = getattr(pads, pad_name)
for i in range(len(pad)):
self.specials += Instance("ODDRX2F",
i_RST = ResetSignal("sys"),
i_RST = ResetSignal("sys") | self._rst.storage,
i_SCLK = ClockSignal("sys"),
i_ECLK = ClockSignal("sys2x"),
**{f"i_D{n}": getattr(dfi.phases[n//2], dfi_name)[i] for n in range(4)},
@ -210,7 +236,7 @@ class ECP5DDRPHY(Module, AutoCSR):
p_DQS_LO_DEL_ADJ = "MINUS",
p_DQS_LO_DEL_VAL = 4,
# Clocks / Reset
i_RST = ResetSignal("sys"),
i_RST = ResetSignal("sys") | self._rst.storage,
i_SCLK = ClockSignal("sys"),
i_ECLK = ClockSignal("sys2x"),
i_DDRDEL = self.init.delay,
@ -252,7 +278,7 @@ class ECP5DDRPHY(Module, AutoCSR):
dqs_oe_n = Signal()
self.specials += [
Instance("ODDRX2DQSB",
i_RST = ResetSignal("sys"),
i_RST = ResetSignal("sys") | self._rst.storage,
i_SCLK = ClockSignal("sys"),
i_ECLK = ClockSignal("sys2x"),
i_DQSW = dqsw,
@ -260,7 +286,7 @@ class ECP5DDRPHY(Module, AutoCSR):
o_Q = dqs
),
Instance("TSHX2DQSA",
i_RST = ResetSignal("sys"),
i_RST = ResetSignal("sys") | self._rst.storage,
i_SCLK = ClockSignal("sys"),
i_ECLK = ClockSignal("sys2x"),
i_DQSW = dqsw,
@ -283,7 +309,7 @@ class ECP5DDRPHY(Module, AutoCSR):
dm_bl8_cases[1] = dm_o_data_muxed.eq(dm_o_data_d[4:])
self.sync += Case(bl8_chunk, dm_bl8_cases)
self.specials += Instance("ODDRX2DQA",
i_RST = ResetSignal("sys"),
i_RST = ResetSignal("sys") | self._rst.storage,
i_SCLK = ClockSignal("sys"),
i_ECLK = ClockSignal("sys2x"),
i_DQSW270 = dqsw270,
@ -310,7 +336,7 @@ class ECP5DDRPHY(Module, AutoCSR):
self.sync += Case(bl8_chunk, dq_bl8_cases)
self.specials += [
Instance("ODDRX2DQA",
i_RST = ResetSignal("sys"),
i_RST = ResetSignal("sys") | self._rst.storage,
i_SCLK = ClockSignal("sys"),
i_ECLK = ClockSignal("sys2x"),
i_DQSW270 = dqsw270,
@ -319,7 +345,7 @@ class ECP5DDRPHY(Module, AutoCSR):
)
]
dq_i_bitslip = BitSlip(4,
rst = self._dly_sel.storage[i] & self._rdly_dq_bitslip_rst.re,
rst = (self._dly_sel.storage[i] & self._rdly_dq_bitslip_rst.re) | self._rst.storage,
slp = self._dly_sel.storage[i] & self._rdly_dq_bitslip.re,
cycles = 1)
self.submodules += dq_i_bitslip
@ -333,7 +359,7 @@ class ECP5DDRPHY(Module, AutoCSR):
o_Z = dq_i_delayed
),
Instance("IDDRX2DQA",
i_RST = ResetSignal("sys"),
i_RST = ResetSignal("sys") | self._rst.storage,
i_SCLK = ClockSignal("sys"),
i_ECLK = ClockSignal("sys2x"),
i_DQSR90 = dqsr90,
@ -350,7 +376,7 @@ class ECP5DDRPHY(Module, AutoCSR):
self.comb += dfi.phases[n//4].rddata[n%4*databits+j].eq(dq_i_data[n])
self.specials += [
Instance("TSHX2DQA",
i_RST = ResetSignal("sys"),
i_RST = ResetSignal("sys") | self._rst.storage,
i_SCLK = ClockSignal("sys"),
i_ECLK = ClockSignal("sys2x"),
i_DQSW270 = dqsw270,

View File

@ -29,7 +29,7 @@ class S7DDRPHY(Module, AutoCSR):
nphases = 4,
sys_clk_freq = 100e6,
iodelay_clk_freq = 200e6,
cmd_latency = 0,
cmd_latency = 1,
cmd_delay = None):
assert not (memtype == "DDR3" and nphases == 2)
phytype = self.__class__.__name__
@ -82,6 +82,9 @@ class S7DDRPHY(Module, AutoCSR):
self._wdly_dqs_rst = CSR()
self._wdly_dqs_inc = CSR()
self._wdly_dq_bitslip_rst = CSR()
self._wdly_dq_bitslip = CSR()
self._rdphase = CSRStorage(int(math.log2(nphases)), reset=rdphase)
self._wrphase = CSRStorage(int(math.log2(nphases)), reset=wrphase)
@ -98,7 +101,7 @@ class S7DDRPHY(Module, AutoCSR):
cl = cl,
cwl = cwl,
read_latency = cl_sys_latency + 6,
write_latency = cwl_sys_latency,
write_latency = cwl_sys_latency - 1,
cmd_latency = cmd_latency,
cmd_delay = cmd_delay,
)
@ -208,8 +211,8 @@ class S7DDRPHY(Module, AutoCSR):
dqs_postamble = Signal()
dqs_oe_delay = TappedDelayLine(ntaps=2 if nphases == 4 else 1)
dqs_pattern = DQSPattern(
preamble = dqs_preamble,
postamble = dqs_postamble,
#preamble = dqs_preamble, # FIXME
#postamble = dqs_postamble, # FIXME
wlevel_en = self._wlevel_en.storage,
wlevel_strobe = self._wlevel_strobe.re,
register = not with_odelay)
@ -219,6 +222,12 @@ class S7DDRPHY(Module, AutoCSR):
dqs_o_no_delay = Signal()
dqs_o_delayed = Signal()
dqs_t = Signal()
dqs_bitslip = BitSlip(8,
i = dqs_pattern.o,
rst = (self._dly_sel.storage[i] & self._wdly_dq_bitslip_rst.re) | self._rst.storage,
slp = self._dly_sel.storage[i] & self._wdly_dq_bitslip.re,
cycles = 1)
self.submodules += dqs_bitslip
self.specials += Instance("OSERDESE2",
p_SERDES_MODE = "MASTER",
p_DATA_WIDTH = 2*nphases,
@ -228,7 +237,7 @@ class S7DDRPHY(Module, AutoCSR):
i_RST = ResetSignal() | self._rst.storage,
i_CLK = ClockSignal(ddr_clk) if with_odelay else ClockSignal(ddr_clk+"_dqs"),
i_CLKDIV = ClockSignal(),
**{f"i_D{n+1}": dqs_pattern.o[n] for n in range(8)},
**{f"i_D{n+1}": dqs_bitslip.o[n] for n in range(8)},
i_OCE = 1,
o_OFB = dqs_o_no_delay if with_odelay else Signal(),
o_OQ = Signal() if with_odelay else dqs_o_no_delay,
@ -264,6 +273,12 @@ class S7DDRPHY(Module, AutoCSR):
# DM ---------------------------------------------------------------------------------------
for i in range(databits//8):
dm_o_nodelay = Signal()
dm_o_bitslip = BitSlip(8,
i = Cat(*[dfi.phases[n//2].wrdata_mask[n%2*databits//8+i] for n in range(8)]),
rst = (self._dly_sel.storage[i] & self._wdly_dq_bitslip_rst.re) | self._rst.storage,
slp = self._dly_sel.storage[i] & self._wdly_dq_bitslip.re,
cycles = 1)
self.submodules += dm_o_bitslip
self.specials += Instance("OSERDESE2",
p_SERDES_MODE = "MASTER",
p_DATA_WIDTH = 2*nphases,
@ -273,7 +288,7 @@ class S7DDRPHY(Module, AutoCSR):
i_RST = ResetSignal() | self._rst.storage,
i_CLK = ClockSignal(ddr_clk),
i_CLKDIV = ClockSignal(),
**{f"i_D{n+1}": dfi.phases[n//2].wrdata_mask[n%2*databits//8+i] for n in range(8)},
**{f"i_D{n+1}": dm_o_bitslip.o[n] for n in range(8)},
i_OCE = 1,
o_OQ = dm_o_nodelay if with_odelay else pads.dm[i],
)
@ -308,6 +323,12 @@ class S7DDRPHY(Module, AutoCSR):
dq_i_delayed = Signal()
dq_t = Signal()
dq_i_data = Signal(8)
dq_o_bitslip = BitSlip(8,
i = Cat(*[dfi.phases[n//2].wrdata[n%2*databits+i] for n in range(8)]),
rst = (self._dly_sel.storage[i//8] & self._wdly_dq_bitslip_rst.re) | self._rst.storage,
slp = self._dly_sel.storage[i//8] & self._wdly_dq_bitslip.re,
cycles = 1)
self.submodules += dq_o_bitslip
self.specials += Instance("OSERDESE2",
p_SERDES_MODE = "MASTER",
p_DATA_WIDTH = 2*nphases,
@ -317,7 +338,7 @@ class S7DDRPHY(Module, AutoCSR):
i_RST = ResetSignal() | self._rst.storage,
i_CLK = ClockSignal(ddr_clk),
i_CLKDIV = ClockSignal(),
**{f"i_D{n+1}": dfi.phases[n//2].wrdata[n%2*databits+i] for n in range(8)},
**{f"i_D{n+1}": dq_o_bitslip.o[n] for n in range(8)},
i_TCE = 1,
i_T1 = ~dq_oe_delay.output,
o_TQ = dq_t,
@ -325,7 +346,7 @@ class S7DDRPHY(Module, AutoCSR):
o_OQ = dq_o_nodelay,
)
dq_i_bitslip = BitSlip(8,
rst = self._dly_sel.storage[i//8] & self._rdly_dq_bitslip_rst.re,
rst = (self._dly_sel.storage[i//8] & self._rdly_dq_bitslip_rst.re) | self._rst.storage,
slp = self._dly_sel.storage[i//8] & self._rdly_dq_bitslip.re,
cycles = 1)
self.submodules += dq_i_bitslip
@ -427,17 +448,17 @@ class S7DDRPHY(Module, AutoCSR):
# Xilinx Virtex7 (S7DDRPHY with odelay) ------------------------------------------------------------
class V7DDRPHY(S7DDRPHY):
def __init__(self, pads, cmd_latency=1, **kwargs):
def __init__(self, pads, **kwargs):
S7DDRPHY.__init__(self, pads, with_odelay=True, **kwargs)
# Xilinx Kintex7 (S7DDRPHY with odelay) ------------------------------------------------------------
class K7DDRPHY(S7DDRPHY):
def __init__(self, pads, cmd_latency=1, **kwargs):
S7DDRPHY.__init__(self, pads, cmd_latency=cmd_latency, with_odelay=True, **kwargs)
def __init__(self, pads, **kwargs):
S7DDRPHY.__init__(self, pads, with_odelay=True, **kwargs)
# Xilinx Artix7 (S7DDRPHY without odelay, sys2/4x_dqs generated in CRG with 90° phase vs sys2/4x) --
class A7DDRPHY(S7DDRPHY):
def __init__(self, pads, cmd_latency=0, **kwargs):
S7DDRPHY.__init__(self, pads, cmd_latency=0, with_odelay=False, **kwargs)
def __init__(self, pads, **kwargs):
S7DDRPHY.__init__(self, pads, with_odelay=False, cmd_latency=0, **kwargs)

View File

@ -81,6 +81,9 @@ class USDDRPHY(Module, AutoCSR):
self._wdly_dqs_rst = CSR()
self._wdly_dqs_inc = CSR()
self._wdly_dq_bitslip_rst = CSR()
self._wdly_dq_bitslip = CSR()
self._rdphase = CSRStorage(2, reset=rdphase)
self._wrphase = CSRStorage(2, reset=wrphase)
@ -97,7 +100,7 @@ class USDDRPHY(Module, AutoCSR):
cl = cl,
cwl = cwl,
read_latency = cl_sys_latency + 5,
write_latency = cwl_sys_latency,
write_latency = cwl_sys_latency - 1,
cmd_latency = cmd_latency,
cmd_delay = cmd_delay,
)
@ -228,8 +231,8 @@ class USDDRPHY(Module, AutoCSR):
dqs_postamble = Signal()
dqs_oe_delay = TappedDelayLine(ntaps=1)
dqs_pattern = DQSPattern(
preamble = dqs_preamble,
postamble = dqs_postamble,
#preamble = dqs_preamble, # FIXME
#postamble = dqs_postamble, # FIXME
wlevel_en = self._wlevel_en.storage,
wlevel_strobe = self._wlevel_strobe.re)
self.submodules += dqs_oe_delay, dqs_pattern
@ -247,6 +250,12 @@ class USDDRPHY(Module, AutoCSR):
dqs_taps_done.eq(1),
self._half_sys8x_taps.status.eq(dqs_taps)
)
dqs_bitslip = BitSlip(8,
i = dqs_pattern.o,
rst = (self._dly_sel.storage[i] & self._wdly_dq_bitslip_rst.re) | self._rst.storage,
slp = self._dly_sel.storage[i] & self._wdly_dq_bitslip.re,
cycles = 1)
self.submodules += dqs_bitslip
if x4_dimm_mode:
dqs_pads = ((pads.dqs_p[i*2], pads.dqs_n[i*2]), (pads.dqs_p[i*2 + 1], pads.dqs_n[i*2 + 1]))
else:
@ -267,7 +276,7 @@ class USDDRPHY(Module, AutoCSR):
i_CLK = ClockSignal("sys4x"),
i_CLKDIV = ClockSignal(),
i_T = ~dqs_oe_delay.output,
i_D = dqs_pattern.o,
i_D = dqs_bitslip.o,
o_OQ = dqs_nodelay,
o_T_OUT = dqs_t,
@ -303,6 +312,12 @@ class USDDRPHY(Module, AutoCSR):
for i in range(databits//8):
if hasattr(pads, "dm"):
dm_o_nodelay = Signal()
dm_o_bitslip = BitSlip(8,
i = Cat(*[dfi.phases[n//2].wrdata_mask[n%2*databits//8+i] for n in range(8)]),
rst = (self._dly_sel.storage[i] & self._wdly_dq_bitslip_rst.re) | self._rst.storage,
slp = self._dly_sel.storage[i] & self._wdly_dq_bitslip.re,
cycles = 1)
self.submodules += dm_o_bitslip
self.specials += [
Instance("OSERDESE3",
p_SIM_DEVICE = device,
@ -314,7 +329,7 @@ class USDDRPHY(Module, AutoCSR):
i_RST = ResetSignal() | self._rst.storage,
i_CLK = ClockSignal("sys4x"),
i_CLKDIV = ClockSignal(),
i_D = Cat(*[dfi.phases[n//2].wrdata_mask[n%2*databits//8+i] for n in range(8)]),
i_D = dm_o_bitslip.o,
o_OQ = dm_o_nodelay,
),
Instance("ODELAYE3",
@ -348,6 +363,12 @@ class USDDRPHY(Module, AutoCSR):
dq_i_nodelay = Signal()
dq_i_delayed = Signal()
dq_t = Signal()
dq_o_bitslip = BitSlip(8,
i = Cat(*[dfi.phases[n//2].wrdata[n%2*databits+i] for n in range(8)]),
rst = (self._dly_sel.storage[i//8] & self._wdly_dq_bitslip_rst.re) | self._rst.storage,
slp = self._dly_sel.storage[i//8] & self._wdly_dq_bitslip.re,
cycles = 1)
self.submodules += dq_o_bitslip
self.specials += Instance("OSERDESE3",
p_SIM_DEVICE = device,
p_DATA_WIDTH = 8,
@ -358,13 +379,13 @@ class USDDRPHY(Module, AutoCSR):
i_RST = ResetSignal() | self._rst.storage,
i_CLK = ClockSignal("sys4x"),
i_CLKDIV = ClockSignal(),
i_D = Cat(*[dfi.phases[n//2].wrdata[n%2*databits+i] for n in range(8)]),
i_D = dq_o_bitslip.o,
i_T = ~dq_oe_delay.output,
o_OQ = dq_o_nodelay,
o_T_OUT = dq_t,
)
dq_i_bitslip = BitSlip(8,
rst = self._dly_sel.storage[i//8] & self._rdly_dq_bitslip_rst.re,
rst = (self._dly_sel.storage[i//8] & self._rdly_dq_bitslip_rst.re) | self._rst.storage,
slp = self._dly_sel.storage[i//8] & self._rdly_dq_bitslip.re,
cycles = 1)
self.submodules += dq_i_bitslip
@ -465,5 +486,5 @@ class USDDRPHY(Module, AutoCSR):
# Xilinx Ultrascale Plus DDR3/DDR4 PHY -------------------------------------------------------------
class USPDDRPHY(USDDRPHY):
def __init__(self, pads, cmd_latency=1, **kwargs):
USDDRPHY.__init__(self, pads, cmd_latency=cmd_latency, **kwargs)
def __init__(self, pads, **kwargs):
USDDRPHY.__init__(self, pads, **kwargs)

View File

@ -25,6 +25,7 @@
#define SDRAM_PHY_RDPHASE 0
#define SDRAM_PHY_WRPHASE 1
#define SDRAM_PHY_WRITE_LEVELING_CAPABLE
#define SDRAM_PHY_WRITE_LATENCY_CALIBRATION_CAPABLE
#define SDRAM_PHY_READ_LEVELING_CAPABLE
#define SDRAM_PHY_MODULES DFII_PIX_DATA_BYTES/2
#define SDRAM_PHY_DELAYS 32

View File

@ -26,6 +26,7 @@
#define SDRAM_PHY_WRPHASE 2
#define SDRAM_PHY_WRITE_LEVELING_CAPABLE
#define SDRAM_PHY_WRITE_LEVELING_REINIT
#define SDRAM_PHY_WRITE_LATENCY_CALIBRATION_CAPABLE
#define SDRAM_PHY_READ_LEVELING_CAPABLE
#define SDRAM_PHY_MODULES DFII_PIX_DATA_BYTES/2
#define SDRAM_PHY_DELAYS 512