phy/ecp5/s7/usddrphy: simplify dq/dqs tristate using TappedDelayLine.

This commit is contained in:
Florent Kermarrec 2020-10-01 19:56:12 +02:00
parent e3461704b5
commit 6a23bd623b
4 changed files with 59 additions and 42 deletions

View File

@ -139,10 +139,11 @@ class BitSlip(Module):
# TappedDelayLine ----------------------------------------------------------------------------------
class TappedDelayLine(Module):
def __init__(self, signal, ntaps):
self.taps = Array(signal if i == 0 else Signal.like(signal) for i in range(ntaps))
for i in range(1, ntaps):
self.sync += self.taps[i].eq(self.taps[i-1])
def __init__(self, signal=None, ntaps=1):
self.input = Signal() if signal is None else signal
self.taps = Array(Signal.like(self.input) for i in range(ntaps))
for i in range(ntaps):
self.sync += self.taps[i].eq(self.input if i == 0 else self.taps[i-1])
self.output = self.taps[-1]
# DQS Pattern --------------------------------------------------------------------------------------

View File

@ -362,6 +362,8 @@ class ECP5DDRPHY(Module, AutoCSR):
]
# Read Control Path ------------------------------------------------------------------------
rdtap = cl_sys_latency
# Creates a delay line of read commands coming from the DFI interface. The taps are used to
# control DQS read (internal read pulse of the DQSBUF) and the output of the delay is used
# signal a valid read data to the DFI interface.
@ -377,10 +379,12 @@ class ECP5DDRPHY(Module, AutoCSR):
)
self.submodules += rddata_en
self.sync += [phase.rddata_valid.eq(rddata_en.output) for phase in dfi.phases]
self.comb += dqs_re.eq(rddata_en.taps[cl_sys_latency + 1] | rddata_en.taps[cl_sys_latency + 2])
self.comb += [phase.rddata_valid.eq(rddata_en.output) for phase in dfi.phases]
self.comb += dqs_re.eq(rddata_en.taps[rdtap] | rddata_en.taps[rdtap + 1])
# Write Control Path -----------------------------------------------------------------------
wrtap = cwl_sys_latency
# Create a delay line of write commands coming from the DFI interface. This taps are used to
# control DQ/DQS tristates and to select write data of the DRAM burst from the DFI interface.
# The PHY is operating in halfrate mode (so provide 4 datas every sys_clk cycles: 2x for DDR,
@ -388,17 +392,17 @@ class ECP5DDRPHY(Module, AutoCSR):
# then performed in 2 sys_clk cycles and data needs to be selected for each cycle.
wrdata_en = TappedDelayLine(
signal = reduce(or_, [dfi.phases[i].wrdata_en for i in range(nphases)]),
ntaps = cwl_sys_latency + 4
ntaps = wrtap + 4
)
self.submodules += wrdata_en
self.comb += dq_oe.eq(wrdata_en.taps[cwl_sys_latency + 1] | wrdata_en.taps[cwl_sys_latency + 2])
self.comb += bl8_chunk.eq(wrdata_en.taps[cwl_sys_latency + 1])
self.comb += dq_oe.eq(wrdata_en.taps[wrtap] | wrdata_en.taps[wrtap + 1])
self.comb += bl8_chunk.eq(wrdata_en.taps[wrtap])
self.comb += dqs_oe.eq(dq_oe)
# Write DQS Postamble/Preamble Control Path ------------------------------------------------
# Generates DQS Preamble 1 cycle before the first write and Postamble 1 cycle after the last
# write. During writes, DQS tristate is configured as output for at least 4 sys_clk cycles:
# 1 for Preamble, 2 for the Write and 1 for the Postamble.
self.comb += dqs_preamble.eq( wrdata_en.taps[cwl_sys_latency + 0] & ~wrdata_en.taps[cwl_sys_latency + 1])
self.comb += dqs_postamble.eq(wrdata_en.taps[cwl_sys_latency + 3] & ~wrdata_en.taps[cwl_sys_latency + 2])
self.comb += dqs_preamble.eq( wrdata_en.taps[wrtap - 1] & ~wrdata_en.taps[wrtap + 0])
self.comb += dqs_postamble.eq(wrdata_en.taps[wrtap + 2] & ~wrdata_en.taps[wrtap + 1])

View File

@ -204,13 +204,17 @@ class S7DDRPHY(Module, AutoCSR):
# DQS --------------------------------------------------------------------------------------
dqs_oe = Signal()
dqs_oe_delayed = Signal() # Tristate control is asynchronous, needs to be delayed.
dqs_preamble = Signal()
dqs_postamble = Signal()
dqs_oe_delay = TappedDelayLine(ntaps=1)
dqs_pattern = DQSPattern(
preamble = dqs_preamble,
postamble = dqs_postamble,
wlevel_en = self._wlevel_en.storage,
wlevel_strobe = self._wlevel_strobe.re,
register = not with_odelay)
self.submodules += dqs_pattern
self.sync += dqs_oe_delayed.eq(dqs_pattern.preamble | dqs_oe | dqs_pattern.postamble)
self.submodules += dqs_oe_delay, dqs_pattern
self.comb += dqs_oe_delay.input.eq(dqs_preamble | dqs_oe | dqs_postamble)
for i in range(databits//8):
dqs_o_no_delay = Signal()
dqs_o_delayed = Signal()
@ -229,7 +233,7 @@ class S7DDRPHY(Module, AutoCSR):
o_OFB = dqs_o_no_delay if with_odelay else Signal(),
o_OQ = Signal() if with_odelay else dqs_o_no_delay,
i_TCE = 1,
i_T1 = ~dqs_oe_delayed,
i_T1 = ~dqs_oe_delay.output,
o_TQ = dqs_t,
)
if with_odelay:
@ -294,8 +298,9 @@ class S7DDRPHY(Module, AutoCSR):
# DQ ---------------------------------------------------------------------------------------
dq_oe = Signal()
dq_oe_delayed = Signal() # Tristate control is asynchronous, needs to be delayed.
self.sync += dq_oe_delayed.eq(dqs_pattern.preamble | dq_oe | dqs_pattern.postamble)
dq_oe_delay = TappedDelayLine(ntaps=1)
self.submodules += dq_oe_delay
self.comb += dq_oe_delay.input.eq(dqs_preamble | dq_oe | dqs_postamble)
for i in range(databits):
dq_o_nodelay = Signal()
dq_o_delayed = Signal()
@ -315,7 +320,7 @@ class S7DDRPHY(Module, AutoCSR):
i_CLKDIV = ClockSignal(),
**{f"i_D{n+1}": dfi.phases[n//2].wrdata[n%2*databits+i] for n in range(8)},
i_TCE = 1,
i_T1 = ~dq_oe_delayed,
i_T1 = ~dq_oe_delay.output,
o_TQ = dq_t,
i_OCE = 1,
o_OQ = dq_o_nodelay,
@ -402,28 +407,28 @@ class S7DDRPHY(Module, AutoCSR):
)
self.submodules += rddata_en
self.sync += [phase.rddata_valid.eq(rddata_en.output | self._wlevel_en.storage) for phase in dfi.phases]
self.comb += [phase.rddata_valid.eq(rddata_en.output | self._wlevel_en.storage) for phase in dfi.phases]
# Write Control Path -----------------------------------------------------------------------
dq_latency = cwl_sys_latency
wrtap = cwl_sys_latency - 1
# Create a delay line of write commands coming from the DFI interface. This taps are used to
# control DQ/DQS tristates.
wrdata_en = TappedDelayLine(
signal = reduce(or_, [dfi.phases[i].wrdata_en for i in range(nphases)]),
ntaps = dq_latency + 2
ntaps = wrtap + 2
)
self.submodules += wrdata_en
self.comb += dq_oe.eq(wrdata_en.taps[cwl_sys_latency])
self.comb += dq_oe.eq(wrdata_en.taps[wrtap])
self.comb += If(self._wlevel_en.storage, dqs_oe.eq(1)).Else(dqs_oe.eq(dq_oe))
# Write DQS Postamble/Preamble Control Path ------------------------------------------------
# Generates DQS Preamble 1 cycle before the first write and Postamble 1 cycle after the last
# write. During writes, DQS tristate is configured as output for at least 3 sys_clk cycles:
# 1 for Preamble, 1 for the Write and 1 for the Postamble.
self.comb += dqs_pattern.preamble.eq( wrdata_en.taps[dq_latency - 1] & ~wrdata_en.taps[dq_latency])
self.comb += dqs_pattern.postamble.eq(wrdata_en.taps[dq_latency + 1] & ~wrdata_en.taps[dq_latency])
self.comb += dqs_preamble.eq( wrdata_en.taps[wrtap - 1] & ~wrdata_en.taps[wrtap + 0])
self.comb += dqs_postamble.eq(wrdata_en.taps[wrtap + 1] & ~wrdata_en.taps[wrtap + 0])
# Xilinx Virtex7 (S7DDRPHY with odelay) ------------------------------------------------------------

View File

@ -224,12 +224,16 @@ class USDDRPHY(Module, AutoCSR):
# DQS --------------------------------------------------------------------------------------
dqs_oe = Signal()
dqs_oe_delayed = Signal() # Tristate control is asynchronous, needs to be delayed.
dqs_preamble = Signal()
dqs_postamble = Signal()
dqs_oe_delay = TappedDelayLine(ntaps=1)
dqs_pattern = DQSPattern(
preamble = dqs_preamble,
postamble = dqs_postamble,
wlevel_en = self._wlevel_en.storage,
wlevel_strobe = self._wlevel_strobe.re)
self.submodules += dqs_pattern
self.sync += dqs_oe_delayed.eq(dqs_pattern.preamble | dqs_oe | dqs_pattern.postamble)
self.submodules += dqs_oe_delay, dqs_pattern
self.comb += dqs_oe_delay.input.eq(dqs_preamble | dqs_oe | dqs_postamble)
for i in range(databits//8):
if i == 0:
# Store initial DQS DELAY_VALUE (in taps) to be able to reload DELAY_VALUE after reset.
@ -262,7 +266,7 @@ class USDDRPHY(Module, AutoCSR):
i_RST = ResetSignal() | self._rst.storage,
i_CLK = ClockSignal("sys4x"),
i_CLKDIV = ClockSignal(),
i_T = ~dqs_oe_delayed,
i_T = ~dqs_oe_delay.output,
i_D = Cat(*[dqs_pattern.o[n] for n in range(8)]),
o_OQ = dqs_nodelay,
o_T_OUT = dqs_t,
@ -335,8 +339,9 @@ class USDDRPHY(Module, AutoCSR):
# DQ ---------------------------------------------------------------------------------------
dq_oe = Signal()
dq_oe_delayed = Signal() # Tristate control is asynchronous, needs to be delayed.
self.sync += dq_oe_delayed.eq(dqs_pattern.preamble | dq_oe | dqs_pattern.postamble)
dq_oe_delay = TappedDelayLine(ntaps=1)
self.submodules += dq_oe_delay
self.comb += dq_oe_delay.input.eq(dqs_preamble | dq_oe | dqs_postamble)
for i in range(databits):
dq_o_nodelay = Signal()
dq_o_delayed = Signal()
@ -360,7 +365,7 @@ class USDDRPHY(Module, AutoCSR):
i_CLK = ClockSignal("sys4x"),
i_CLKDIV = ClockSignal(),
i_D = Cat(*[dfi.phases[n//2].wrdata[n%2*databits+i] for n in range(8)]),
i_T = ~dq_oe_delayed,
i_T = ~dq_oe_delay.output,
o_OQ = dq_o_nodelay,
o_T_OUT = dq_t,
),
@ -436,26 +441,28 @@ class USDDRPHY(Module, AutoCSR):
)
self.submodules += rddata_en
self.sync += [phase.rddata_valid.eq(rddata_en.output | self._wlevel_en.storage) for phase in dfi.phases]
self.comb += [phase.rddata_valid.eq(rddata_en.output | self._wlevel_en.storage) for phase in dfi.phases]
# Write Control Path -----------------------------------------------------------------------
wrtap = cwl_sys_latency - 1
# Create a delay line of write commands coming from the DFI interface. This taps are used to
# control DQ/DQS tristates.
wrdata_en = TappedDelayLine(
signal = reduce(or_, [dfi.phases[i].wrdata_en for i in range(nphases)]),
ntaps = cwl_sys_latency + 2
ntaps = wrtap + 2
)
self.submodules += wrdata_en
self.comb += dq_oe.eq(wrdata_en.taps[cwl_sys_latency])
self.comb += dq_oe.eq(wrdata_en.taps[wrtap])
self.comb += If(self._wlevel_en.storage, dqs_oe.eq(1)).Else(dqs_oe.eq(dq_oe))
# Write DQS Postamble/Preamble Control Path ------------------------------------------------
# Generates DQS Preamble 1 cycle before the first write and Postamble 1 cycle after the last
# write. During writes, DQS tristate is configured as output for at least 3 sys_clk cycles:
# 1 for Preamble, 1 for the Write and 1 for the Postamble.
self.comb += dqs_pattern.preamble.eq( wrdata_en.taps[cwl_sys_latency - 1] & ~wrdata_en.taps[cwl_sys_latency])
self.comb += dqs_pattern.postamble.eq(wrdata_en.taps[cwl_sys_latency + 1] & ~wrdata_en.taps[cwl_sys_latency])
self.comb += dqs_preamble.eq( wrdata_en.taps[wrtap - 1] & ~wrdata_en.taps[wrtap + 0])
self.comb += dqs_postamble.eq(wrdata_en.taps[wrtap + 1] & ~wrdata_en.taps[wrtap + 0])
# Xilinx Ultrascale Plus DDR3/DDR4 PHY -------------------------------------------------------------