phy/ecp5/s7/usddrphy: simplify dq/dqs tristate using TappedDelayLine.

This commit is contained in:
Florent Kermarrec 2020-10-01 19:56:12 +02:00
parent e3461704b5
commit 6a23bd623b
4 changed files with 59 additions and 42 deletions

View File

@ -139,10 +139,11 @@ class BitSlip(Module):
# TappedDelayLine ---------------------------------------------------------------------------------- # TappedDelayLine ----------------------------------------------------------------------------------
class TappedDelayLine(Module): class TappedDelayLine(Module):
def __init__(self, signal, ntaps): def __init__(self, signal=None, ntaps=1):
self.taps = Array(signal if i == 0 else Signal.like(signal) for i in range(ntaps)) self.input = Signal() if signal is None else signal
for i in range(1, ntaps): self.taps = Array(Signal.like(self.input) for i in range(ntaps))
self.sync += self.taps[i].eq(self.taps[i-1]) for i in range(ntaps):
self.sync += self.taps[i].eq(self.input if i == 0 else self.taps[i-1])
self.output = self.taps[-1] self.output = self.taps[-1]
# DQS Pattern -------------------------------------------------------------------------------------- # DQS Pattern --------------------------------------------------------------------------------------

View File

@ -362,6 +362,8 @@ class ECP5DDRPHY(Module, AutoCSR):
] ]
# Read Control Path ------------------------------------------------------------------------ # Read Control Path ------------------------------------------------------------------------
rdtap = cl_sys_latency
# Creates a delay line of read commands coming from the DFI interface. The taps are used to # Creates a delay line of read commands coming from the DFI interface. The taps are used to
# control DQS read (internal read pulse of the DQSBUF) and the output of the delay is used # control DQS read (internal read pulse of the DQSBUF) and the output of the delay is used
# signal a valid read data to the DFI interface. # signal a valid read data to the DFI interface.
@ -377,10 +379,12 @@ class ECP5DDRPHY(Module, AutoCSR):
) )
self.submodules += rddata_en self.submodules += rddata_en
self.sync += [phase.rddata_valid.eq(rddata_en.output) for phase in dfi.phases] self.comb += [phase.rddata_valid.eq(rddata_en.output) for phase in dfi.phases]
self.comb += dqs_re.eq(rddata_en.taps[cl_sys_latency + 1] | rddata_en.taps[cl_sys_latency + 2]) self.comb += dqs_re.eq(rddata_en.taps[rdtap] | rddata_en.taps[rdtap + 1])
# Write Control Path ----------------------------------------------------------------------- # Write Control Path -----------------------------------------------------------------------
wrtap = cwl_sys_latency
# Create a delay line of write commands coming from the DFI interface. This taps are used to # Create a delay line of write commands coming from the DFI interface. This taps are used to
# control DQ/DQS tristates and to select write data of the DRAM burst from the DFI interface. # control DQ/DQS tristates and to select write data of the DRAM burst from the DFI interface.
# The PHY is operating in halfrate mode (so provide 4 datas every sys_clk cycles: 2x for DDR, # The PHY is operating in halfrate mode (so provide 4 datas every sys_clk cycles: 2x for DDR,
@ -388,17 +392,17 @@ class ECP5DDRPHY(Module, AutoCSR):
# then performed in 2 sys_clk cycles and data needs to be selected for each cycle. # then performed in 2 sys_clk cycles and data needs to be selected for each cycle.
wrdata_en = TappedDelayLine( wrdata_en = TappedDelayLine(
signal = reduce(or_, [dfi.phases[i].wrdata_en for i in range(nphases)]), signal = reduce(or_, [dfi.phases[i].wrdata_en for i in range(nphases)]),
ntaps = cwl_sys_latency + 4 ntaps = wrtap + 4
) )
self.submodules += wrdata_en self.submodules += wrdata_en
self.comb += dq_oe.eq(wrdata_en.taps[cwl_sys_latency + 1] | wrdata_en.taps[cwl_sys_latency + 2]) self.comb += dq_oe.eq(wrdata_en.taps[wrtap] | wrdata_en.taps[wrtap + 1])
self.comb += bl8_chunk.eq(wrdata_en.taps[cwl_sys_latency + 1]) self.comb += bl8_chunk.eq(wrdata_en.taps[wrtap])
self.comb += dqs_oe.eq(dq_oe) self.comb += dqs_oe.eq(dq_oe)
# Write DQS Postamble/Preamble Control Path ------------------------------------------------ # Write DQS Postamble/Preamble Control Path ------------------------------------------------
# Generates DQS Preamble 1 cycle before the first write and Postamble 1 cycle after the last # Generates DQS Preamble 1 cycle before the first write and Postamble 1 cycle after the last
# write. During writes, DQS tristate is configured as output for at least 4 sys_clk cycles: # write. During writes, DQS tristate is configured as output for at least 4 sys_clk cycles:
# 1 for Preamble, 2 for the Write and 1 for the Postamble. # 1 for Preamble, 2 for the Write and 1 for the Postamble.
self.comb += dqs_preamble.eq( wrdata_en.taps[cwl_sys_latency + 0] & ~wrdata_en.taps[cwl_sys_latency + 1]) self.comb += dqs_preamble.eq( wrdata_en.taps[wrtap - 1] & ~wrdata_en.taps[wrtap + 0])
self.comb += dqs_postamble.eq(wrdata_en.taps[cwl_sys_latency + 3] & ~wrdata_en.taps[cwl_sys_latency + 2]) self.comb += dqs_postamble.eq(wrdata_en.taps[wrtap + 2] & ~wrdata_en.taps[wrtap + 1])

View File

@ -203,14 +203,18 @@ class S7DDRPHY(Module, AutoCSR):
) )
# DQS -------------------------------------------------------------------------------------- # DQS --------------------------------------------------------------------------------------
dqs_oe = Signal() dqs_oe = Signal()
dqs_oe_delayed = Signal() # Tristate control is asynchronous, needs to be delayed. dqs_preamble = Signal()
dqs_pattern = DQSPattern( dqs_postamble = Signal()
dqs_oe_delay = TappedDelayLine(ntaps=1)
dqs_pattern = DQSPattern(
preamble = dqs_preamble,
postamble = dqs_postamble,
wlevel_en = self._wlevel_en.storage, wlevel_en = self._wlevel_en.storage,
wlevel_strobe = self._wlevel_strobe.re, wlevel_strobe = self._wlevel_strobe.re,
register = not with_odelay) register = not with_odelay)
self.submodules += dqs_pattern self.submodules += dqs_oe_delay, dqs_pattern
self.sync += dqs_oe_delayed.eq(dqs_pattern.preamble | dqs_oe | dqs_pattern.postamble) self.comb += dqs_oe_delay.input.eq(dqs_preamble | dqs_oe | dqs_postamble)
for i in range(databits//8): for i in range(databits//8):
dqs_o_no_delay = Signal() dqs_o_no_delay = Signal()
dqs_o_delayed = Signal() dqs_o_delayed = Signal()
@ -229,7 +233,7 @@ class S7DDRPHY(Module, AutoCSR):
o_OFB = dqs_o_no_delay if with_odelay else Signal(), o_OFB = dqs_o_no_delay if with_odelay else Signal(),
o_OQ = Signal() if with_odelay else dqs_o_no_delay, o_OQ = Signal() if with_odelay else dqs_o_no_delay,
i_TCE = 1, i_TCE = 1,
i_T1 = ~dqs_oe_delayed, i_T1 = ~dqs_oe_delay.output,
o_TQ = dqs_t, o_TQ = dqs_t,
) )
if with_odelay: if with_odelay:
@ -293,9 +297,10 @@ class S7DDRPHY(Module, AutoCSR):
) )
# DQ --------------------------------------------------------------------------------------- # DQ ---------------------------------------------------------------------------------------
dq_oe = Signal() dq_oe = Signal()
dq_oe_delayed = Signal() # Tristate control is asynchronous, needs to be delayed. dq_oe_delay = TappedDelayLine(ntaps=1)
self.sync += dq_oe_delayed.eq(dqs_pattern.preamble | dq_oe | dqs_pattern.postamble) self.submodules += dq_oe_delay
self.comb += dq_oe_delay.input.eq(dqs_preamble | dq_oe | dqs_postamble)
for i in range(databits): for i in range(databits):
dq_o_nodelay = Signal() dq_o_nodelay = Signal()
dq_o_delayed = Signal() dq_o_delayed = Signal()
@ -315,7 +320,7 @@ class S7DDRPHY(Module, AutoCSR):
i_CLKDIV = ClockSignal(), i_CLKDIV = ClockSignal(),
**{f"i_D{n+1}": dfi.phases[n//2].wrdata[n%2*databits+i] for n in range(8)}, **{f"i_D{n+1}": dfi.phases[n//2].wrdata[n%2*databits+i] for n in range(8)},
i_TCE = 1, i_TCE = 1,
i_T1 = ~dq_oe_delayed, i_T1 = ~dq_oe_delay.output,
o_TQ = dq_t, o_TQ = dq_t,
i_OCE = 1, i_OCE = 1,
o_OQ = dq_o_nodelay, o_OQ = dq_o_nodelay,
@ -402,28 +407,28 @@ class S7DDRPHY(Module, AutoCSR):
) )
self.submodules += rddata_en self.submodules += rddata_en
self.sync += [phase.rddata_valid.eq(rddata_en.output | self._wlevel_en.storage) for phase in dfi.phases] self.comb += [phase.rddata_valid.eq(rddata_en.output | self._wlevel_en.storage) for phase in dfi.phases]
# Write Control Path ----------------------------------------------------------------------- # Write Control Path -----------------------------------------------------------------------
dq_latency = cwl_sys_latency wrtap = cwl_sys_latency - 1
# Create a delay line of write commands coming from the DFI interface. This taps are used to # Create a delay line of write commands coming from the DFI interface. This taps are used to
# control DQ/DQS tristates. # control DQ/DQS tristates.
wrdata_en = TappedDelayLine( wrdata_en = TappedDelayLine(
signal = reduce(or_, [dfi.phases[i].wrdata_en for i in range(nphases)]), signal = reduce(or_, [dfi.phases[i].wrdata_en for i in range(nphases)]),
ntaps = dq_latency + 2 ntaps = wrtap + 2
) )
self.submodules += wrdata_en self.submodules += wrdata_en
self.comb += dq_oe.eq(wrdata_en.taps[cwl_sys_latency]) self.comb += dq_oe.eq(wrdata_en.taps[wrtap])
self.comb += If(self._wlevel_en.storage, dqs_oe.eq(1)).Else(dqs_oe.eq(dq_oe)) self.comb += If(self._wlevel_en.storage, dqs_oe.eq(1)).Else(dqs_oe.eq(dq_oe))
# Write DQS Postamble/Preamble Control Path ------------------------------------------------ # Write DQS Postamble/Preamble Control Path ------------------------------------------------
# Generates DQS Preamble 1 cycle before the first write and Postamble 1 cycle after the last # Generates DQS Preamble 1 cycle before the first write and Postamble 1 cycle after the last
# write. During writes, DQS tristate is configured as output for at least 3 sys_clk cycles: # write. During writes, DQS tristate is configured as output for at least 3 sys_clk cycles:
# 1 for Preamble, 1 for the Write and 1 for the Postamble. # 1 for Preamble, 1 for the Write and 1 for the Postamble.
self.comb += dqs_pattern.preamble.eq( wrdata_en.taps[dq_latency - 1] & ~wrdata_en.taps[dq_latency]) self.comb += dqs_preamble.eq( wrdata_en.taps[wrtap - 1] & ~wrdata_en.taps[wrtap + 0])
self.comb += dqs_pattern.postamble.eq(wrdata_en.taps[dq_latency + 1] & ~wrdata_en.taps[dq_latency]) self.comb += dqs_postamble.eq(wrdata_en.taps[wrtap + 1] & ~wrdata_en.taps[wrtap + 0])
# Xilinx Virtex7 (S7DDRPHY with odelay) ------------------------------------------------------------ # Xilinx Virtex7 (S7DDRPHY with odelay) ------------------------------------------------------------

View File

@ -223,13 +223,17 @@ class USDDRPHY(Module, AutoCSR):
self.comb += pads.ten.eq(0) self.comb += pads.ten.eq(0)
# DQS -------------------------------------------------------------------------------------- # DQS --------------------------------------------------------------------------------------
dqs_oe = Signal() dqs_oe = Signal()
dqs_oe_delayed = Signal() # Tristate control is asynchronous, needs to be delayed. dqs_preamble = Signal()
dqs_pattern = DQSPattern( dqs_postamble = Signal()
dqs_oe_delay = TappedDelayLine(ntaps=1)
dqs_pattern = DQSPattern(
preamble = dqs_preamble,
postamble = dqs_postamble,
wlevel_en = self._wlevel_en.storage, wlevel_en = self._wlevel_en.storage,
wlevel_strobe = self._wlevel_strobe.re) wlevel_strobe = self._wlevel_strobe.re)
self.submodules += dqs_pattern self.submodules += dqs_oe_delay, dqs_pattern
self.sync += dqs_oe_delayed.eq(dqs_pattern.preamble | dqs_oe | dqs_pattern.postamble) self.comb += dqs_oe_delay.input.eq(dqs_preamble | dqs_oe | dqs_postamble)
for i in range(databits//8): for i in range(databits//8):
if i == 0: if i == 0:
# Store initial DQS DELAY_VALUE (in taps) to be able to reload DELAY_VALUE after reset. # Store initial DQS DELAY_VALUE (in taps) to be able to reload DELAY_VALUE after reset.
@ -262,7 +266,7 @@ class USDDRPHY(Module, AutoCSR):
i_RST = ResetSignal() | self._rst.storage, i_RST = ResetSignal() | self._rst.storage,
i_CLK = ClockSignal("sys4x"), i_CLK = ClockSignal("sys4x"),
i_CLKDIV = ClockSignal(), i_CLKDIV = ClockSignal(),
i_T = ~dqs_oe_delayed, i_T = ~dqs_oe_delay.output,
i_D = Cat(*[dqs_pattern.o[n] for n in range(8)]), i_D = Cat(*[dqs_pattern.o[n] for n in range(8)]),
o_OQ = dqs_nodelay, o_OQ = dqs_nodelay,
o_T_OUT = dqs_t, o_T_OUT = dqs_t,
@ -334,9 +338,10 @@ class USDDRPHY(Module, AutoCSR):
] ]
# DQ --------------------------------------------------------------------------------------- # DQ ---------------------------------------------------------------------------------------
dq_oe = Signal() dq_oe = Signal()
dq_oe_delayed = Signal() # Tristate control is asynchronous, needs to be delayed. dq_oe_delay = TappedDelayLine(ntaps=1)
self.sync += dq_oe_delayed.eq(dqs_pattern.preamble | dq_oe | dqs_pattern.postamble) self.submodules += dq_oe_delay
self.comb += dq_oe_delay.input.eq(dqs_preamble | dq_oe | dqs_postamble)
for i in range(databits): for i in range(databits):
dq_o_nodelay = Signal() dq_o_nodelay = Signal()
dq_o_delayed = Signal() dq_o_delayed = Signal()
@ -360,7 +365,7 @@ class USDDRPHY(Module, AutoCSR):
i_CLK = ClockSignal("sys4x"), i_CLK = ClockSignal("sys4x"),
i_CLKDIV = ClockSignal(), i_CLKDIV = ClockSignal(),
i_D = Cat(*[dfi.phases[n//2].wrdata[n%2*databits+i] for n in range(8)]), i_D = Cat(*[dfi.phases[n//2].wrdata[n%2*databits+i] for n in range(8)]),
i_T = ~dq_oe_delayed, i_T = ~dq_oe_delay.output,
o_OQ = dq_o_nodelay, o_OQ = dq_o_nodelay,
o_T_OUT = dq_t, o_T_OUT = dq_t,
), ),
@ -436,26 +441,28 @@ class USDDRPHY(Module, AutoCSR):
) )
self.submodules += rddata_en self.submodules += rddata_en
self.sync += [phase.rddata_valid.eq(rddata_en.output | self._wlevel_en.storage) for phase in dfi.phases] self.comb += [phase.rddata_valid.eq(rddata_en.output | self._wlevel_en.storage) for phase in dfi.phases]
# Write Control Path ----------------------------------------------------------------------- # Write Control Path -----------------------------------------------------------------------
wrtap = cwl_sys_latency - 1
# Create a delay line of write commands coming from the DFI interface. This taps are used to # Create a delay line of write commands coming from the DFI interface. This taps are used to
# control DQ/DQS tristates. # control DQ/DQS tristates.
wrdata_en = TappedDelayLine( wrdata_en = TappedDelayLine(
signal = reduce(or_, [dfi.phases[i].wrdata_en for i in range(nphases)]), signal = reduce(or_, [dfi.phases[i].wrdata_en for i in range(nphases)]),
ntaps = cwl_sys_latency + 2 ntaps = wrtap + 2
) )
self.submodules += wrdata_en self.submodules += wrdata_en
self.comb += dq_oe.eq(wrdata_en.taps[cwl_sys_latency]) self.comb += dq_oe.eq(wrdata_en.taps[wrtap])
self.comb += If(self._wlevel_en.storage, dqs_oe.eq(1)).Else(dqs_oe.eq(dq_oe)) self.comb += If(self._wlevel_en.storage, dqs_oe.eq(1)).Else(dqs_oe.eq(dq_oe))
# Write DQS Postamble/Preamble Control Path ------------------------------------------------ # Write DQS Postamble/Preamble Control Path ------------------------------------------------
# Generates DQS Preamble 1 cycle before the first write and Postamble 1 cycle after the last # Generates DQS Preamble 1 cycle before the first write and Postamble 1 cycle after the last
# write. During writes, DQS tristate is configured as output for at least 3 sys_clk cycles: # write. During writes, DQS tristate is configured as output for at least 3 sys_clk cycles:
# 1 for Preamble, 1 for the Write and 1 for the Postamble. # 1 for Preamble, 1 for the Write and 1 for the Postamble.
self.comb += dqs_pattern.preamble.eq( wrdata_en.taps[cwl_sys_latency - 1] & ~wrdata_en.taps[cwl_sys_latency]) self.comb += dqs_preamble.eq( wrdata_en.taps[wrtap - 1] & ~wrdata_en.taps[wrtap + 0])
self.comb += dqs_pattern.postamble.eq(wrdata_en.taps[cwl_sys_latency + 1] & ~wrdata_en.taps[cwl_sys_latency]) self.comb += dqs_postamble.eq(wrdata_en.taps[wrtap + 1] & ~wrdata_en.taps[wrtap + 0])
# Xilinx Ultrascale Plus DDR3/DDR4 PHY ------------------------------------------------------------- # Xilinx Ultrascale Plus DDR3/DDR4 PHY -------------------------------------------------------------