From 6a23bd623b3a6119f4c67073f1768724c0cef758 Mon Sep 17 00:00:00 2001 From: Florent Kermarrec Date: Thu, 1 Oct 2020 19:56:12 +0200 Subject: [PATCH] phy/ecp5/s7/usddrphy: simplify dq/dqs tristate using TappedDelayLine. --- litedram/common.py | 9 +++++---- litedram/phy/ecp5ddrphy.py | 18 +++++++++++------- litedram/phy/s7ddrphy.py | 37 +++++++++++++++++++++---------------- litedram/phy/usddrphy.py | 37 ++++++++++++++++++++++--------------- 4 files changed, 59 insertions(+), 42 deletions(-) diff --git a/litedram/common.py b/litedram/common.py index f835e0e..08a2b38 100644 --- a/litedram/common.py +++ b/litedram/common.py @@ -139,10 +139,11 @@ class BitSlip(Module): # TappedDelayLine ---------------------------------------------------------------------------------- class TappedDelayLine(Module): - def __init__(self, signal, ntaps): - self.taps = Array(signal if i == 0 else Signal.like(signal) for i in range(ntaps)) - for i in range(1, ntaps): - self.sync += self.taps[i].eq(self.taps[i-1]) + def __init__(self, signal=None, ntaps=1): + self.input = Signal() if signal is None else signal + self.taps = Array(Signal.like(self.input) for i in range(ntaps)) + for i in range(ntaps): + self.sync += self.taps[i].eq(self.input if i == 0 else self.taps[i-1]) self.output = self.taps[-1] # DQS Pattern -------------------------------------------------------------------------------------- diff --git a/litedram/phy/ecp5ddrphy.py b/litedram/phy/ecp5ddrphy.py index b1be838..939e213 100644 --- a/litedram/phy/ecp5ddrphy.py +++ b/litedram/phy/ecp5ddrphy.py @@ -362,6 +362,8 @@ class ECP5DDRPHY(Module, AutoCSR): ] # Read Control Path ------------------------------------------------------------------------ + rdtap = cl_sys_latency + # Creates a delay line of read commands coming from the DFI interface. The taps are used to # control DQS read (internal read pulse of the DQSBUF) and the output of the delay is used # signal a valid read data to the DFI interface. @@ -377,10 +379,12 @@ class ECP5DDRPHY(Module, AutoCSR): ) self.submodules += rddata_en - self.sync += [phase.rddata_valid.eq(rddata_en.output) for phase in dfi.phases] - self.comb += dqs_re.eq(rddata_en.taps[cl_sys_latency + 1] | rddata_en.taps[cl_sys_latency + 2]) + self.comb += [phase.rddata_valid.eq(rddata_en.output) for phase in dfi.phases] + self.comb += dqs_re.eq(rddata_en.taps[rdtap] | rddata_en.taps[rdtap + 1]) # Write Control Path ----------------------------------------------------------------------- + wrtap = cwl_sys_latency + # Create a delay line of write commands coming from the DFI interface. This taps are used to # control DQ/DQS tristates and to select write data of the DRAM burst from the DFI interface. # The PHY is operating in halfrate mode (so provide 4 datas every sys_clk cycles: 2x for DDR, @@ -388,17 +392,17 @@ class ECP5DDRPHY(Module, AutoCSR): # then performed in 2 sys_clk cycles and data needs to be selected for each cycle. wrdata_en = TappedDelayLine( signal = reduce(or_, [dfi.phases[i].wrdata_en for i in range(nphases)]), - ntaps = cwl_sys_latency + 4 + ntaps = wrtap + 4 ) self.submodules += wrdata_en - self.comb += dq_oe.eq(wrdata_en.taps[cwl_sys_latency + 1] | wrdata_en.taps[cwl_sys_latency + 2]) - self.comb += bl8_chunk.eq(wrdata_en.taps[cwl_sys_latency + 1]) + self.comb += dq_oe.eq(wrdata_en.taps[wrtap] | wrdata_en.taps[wrtap + 1]) + self.comb += bl8_chunk.eq(wrdata_en.taps[wrtap]) self.comb += dqs_oe.eq(dq_oe) # Write DQS Postamble/Preamble Control Path ------------------------------------------------ # Generates DQS Preamble 1 cycle before the first write and Postamble 1 cycle after the last # write. During writes, DQS tristate is configured as output for at least 4 sys_clk cycles: # 1 for Preamble, 2 for the Write and 1 for the Postamble. - self.comb += dqs_preamble.eq( wrdata_en.taps[cwl_sys_latency + 0] & ~wrdata_en.taps[cwl_sys_latency + 1]) - self.comb += dqs_postamble.eq(wrdata_en.taps[cwl_sys_latency + 3] & ~wrdata_en.taps[cwl_sys_latency + 2]) + self.comb += dqs_preamble.eq( wrdata_en.taps[wrtap - 1] & ~wrdata_en.taps[wrtap + 0]) + self.comb += dqs_postamble.eq(wrdata_en.taps[wrtap + 2] & ~wrdata_en.taps[wrtap + 1]) diff --git a/litedram/phy/s7ddrphy.py b/litedram/phy/s7ddrphy.py index d4594cf..b492959 100644 --- a/litedram/phy/s7ddrphy.py +++ b/litedram/phy/s7ddrphy.py @@ -203,14 +203,18 @@ class S7DDRPHY(Module, AutoCSR): ) # DQS -------------------------------------------------------------------------------------- - dqs_oe = Signal() - dqs_oe_delayed = Signal() # Tristate control is asynchronous, needs to be delayed. - dqs_pattern = DQSPattern( + dqs_oe = Signal() + dqs_preamble = Signal() + dqs_postamble = Signal() + dqs_oe_delay = TappedDelayLine(ntaps=1) + dqs_pattern = DQSPattern( + preamble = dqs_preamble, + postamble = dqs_postamble, wlevel_en = self._wlevel_en.storage, wlevel_strobe = self._wlevel_strobe.re, register = not with_odelay) - self.submodules += dqs_pattern - self.sync += dqs_oe_delayed.eq(dqs_pattern.preamble | dqs_oe | dqs_pattern.postamble) + self.submodules += dqs_oe_delay, dqs_pattern + self.comb += dqs_oe_delay.input.eq(dqs_preamble | dqs_oe | dqs_postamble) for i in range(databits//8): dqs_o_no_delay = Signal() dqs_o_delayed = Signal() @@ -229,7 +233,7 @@ class S7DDRPHY(Module, AutoCSR): o_OFB = dqs_o_no_delay if with_odelay else Signal(), o_OQ = Signal() if with_odelay else dqs_o_no_delay, i_TCE = 1, - i_T1 = ~dqs_oe_delayed, + i_T1 = ~dqs_oe_delay.output, o_TQ = dqs_t, ) if with_odelay: @@ -293,9 +297,10 @@ class S7DDRPHY(Module, AutoCSR): ) # DQ --------------------------------------------------------------------------------------- - dq_oe = Signal() - dq_oe_delayed = Signal() # Tristate control is asynchronous, needs to be delayed. - self.sync += dq_oe_delayed.eq(dqs_pattern.preamble | dq_oe | dqs_pattern.postamble) + dq_oe = Signal() + dq_oe_delay = TappedDelayLine(ntaps=1) + self.submodules += dq_oe_delay + self.comb += dq_oe_delay.input.eq(dqs_preamble | dq_oe | dqs_postamble) for i in range(databits): dq_o_nodelay = Signal() dq_o_delayed = Signal() @@ -315,7 +320,7 @@ class S7DDRPHY(Module, AutoCSR): i_CLKDIV = ClockSignal(), **{f"i_D{n+1}": dfi.phases[n//2].wrdata[n%2*databits+i] for n in range(8)}, i_TCE = 1, - i_T1 = ~dq_oe_delayed, + i_T1 = ~dq_oe_delay.output, o_TQ = dq_t, i_OCE = 1, o_OQ = dq_o_nodelay, @@ -402,28 +407,28 @@ class S7DDRPHY(Module, AutoCSR): ) self.submodules += rddata_en - self.sync += [phase.rddata_valid.eq(rddata_en.output | self._wlevel_en.storage) for phase in dfi.phases] + self.comb += [phase.rddata_valid.eq(rddata_en.output | self._wlevel_en.storage) for phase in dfi.phases] # Write Control Path ----------------------------------------------------------------------- - dq_latency = cwl_sys_latency + wrtap = cwl_sys_latency - 1 # Create a delay line of write commands coming from the DFI interface. This taps are used to # control DQ/DQS tristates. wrdata_en = TappedDelayLine( signal = reduce(or_, [dfi.phases[i].wrdata_en for i in range(nphases)]), - ntaps = dq_latency + 2 + ntaps = wrtap + 2 ) self.submodules += wrdata_en - self.comb += dq_oe.eq(wrdata_en.taps[cwl_sys_latency]) + self.comb += dq_oe.eq(wrdata_en.taps[wrtap]) self.comb += If(self._wlevel_en.storage, dqs_oe.eq(1)).Else(dqs_oe.eq(dq_oe)) # Write DQS Postamble/Preamble Control Path ------------------------------------------------ # Generates DQS Preamble 1 cycle before the first write and Postamble 1 cycle after the last # write. During writes, DQS tristate is configured as output for at least 3 sys_clk cycles: # 1 for Preamble, 1 for the Write and 1 for the Postamble. - self.comb += dqs_pattern.preamble.eq( wrdata_en.taps[dq_latency - 1] & ~wrdata_en.taps[dq_latency]) - self.comb += dqs_pattern.postamble.eq(wrdata_en.taps[dq_latency + 1] & ~wrdata_en.taps[dq_latency]) + self.comb += dqs_preamble.eq( wrdata_en.taps[wrtap - 1] & ~wrdata_en.taps[wrtap + 0]) + self.comb += dqs_postamble.eq(wrdata_en.taps[wrtap + 1] & ~wrdata_en.taps[wrtap + 0]) # Xilinx Virtex7 (S7DDRPHY with odelay) ------------------------------------------------------------ diff --git a/litedram/phy/usddrphy.py b/litedram/phy/usddrphy.py index 6e52a3c..e1ee2c9 100644 --- a/litedram/phy/usddrphy.py +++ b/litedram/phy/usddrphy.py @@ -223,13 +223,17 @@ class USDDRPHY(Module, AutoCSR): self.comb += pads.ten.eq(0) # DQS -------------------------------------------------------------------------------------- - dqs_oe = Signal() - dqs_oe_delayed = Signal() # Tristate control is asynchronous, needs to be delayed. - dqs_pattern = DQSPattern( + dqs_oe = Signal() + dqs_preamble = Signal() + dqs_postamble = Signal() + dqs_oe_delay = TappedDelayLine(ntaps=1) + dqs_pattern = DQSPattern( + preamble = dqs_preamble, + postamble = dqs_postamble, wlevel_en = self._wlevel_en.storage, wlevel_strobe = self._wlevel_strobe.re) - self.submodules += dqs_pattern - self.sync += dqs_oe_delayed.eq(dqs_pattern.preamble | dqs_oe | dqs_pattern.postamble) + self.submodules += dqs_oe_delay, dqs_pattern + self.comb += dqs_oe_delay.input.eq(dqs_preamble | dqs_oe | dqs_postamble) for i in range(databits//8): if i == 0: # Store initial DQS DELAY_VALUE (in taps) to be able to reload DELAY_VALUE after reset. @@ -262,7 +266,7 @@ class USDDRPHY(Module, AutoCSR): i_RST = ResetSignal() | self._rst.storage, i_CLK = ClockSignal("sys4x"), i_CLKDIV = ClockSignal(), - i_T = ~dqs_oe_delayed, + i_T = ~dqs_oe_delay.output, i_D = Cat(*[dqs_pattern.o[n] for n in range(8)]), o_OQ = dqs_nodelay, o_T_OUT = dqs_t, @@ -334,9 +338,10 @@ class USDDRPHY(Module, AutoCSR): ] # DQ --------------------------------------------------------------------------------------- - dq_oe = Signal() - dq_oe_delayed = Signal() # Tristate control is asynchronous, needs to be delayed. - self.sync += dq_oe_delayed.eq(dqs_pattern.preamble | dq_oe | dqs_pattern.postamble) + dq_oe = Signal() + dq_oe_delay = TappedDelayLine(ntaps=1) + self.submodules += dq_oe_delay + self.comb += dq_oe_delay.input.eq(dqs_preamble | dq_oe | dqs_postamble) for i in range(databits): dq_o_nodelay = Signal() dq_o_delayed = Signal() @@ -360,7 +365,7 @@ class USDDRPHY(Module, AutoCSR): i_CLK = ClockSignal("sys4x"), i_CLKDIV = ClockSignal(), i_D = Cat(*[dfi.phases[n//2].wrdata[n%2*databits+i] for n in range(8)]), - i_T = ~dq_oe_delayed, + i_T = ~dq_oe_delay.output, o_OQ = dq_o_nodelay, o_T_OUT = dq_t, ), @@ -436,26 +441,28 @@ class USDDRPHY(Module, AutoCSR): ) self.submodules += rddata_en - self.sync += [phase.rddata_valid.eq(rddata_en.output | self._wlevel_en.storage) for phase in dfi.phases] + self.comb += [phase.rddata_valid.eq(rddata_en.output | self._wlevel_en.storage) for phase in dfi.phases] # Write Control Path ----------------------------------------------------------------------- + wrtap = cwl_sys_latency - 1 + # Create a delay line of write commands coming from the DFI interface. This taps are used to # control DQ/DQS tristates. wrdata_en = TappedDelayLine( signal = reduce(or_, [dfi.phases[i].wrdata_en for i in range(nphases)]), - ntaps = cwl_sys_latency + 2 + ntaps = wrtap + 2 ) self.submodules += wrdata_en - self.comb += dq_oe.eq(wrdata_en.taps[cwl_sys_latency]) + self.comb += dq_oe.eq(wrdata_en.taps[wrtap]) self.comb += If(self._wlevel_en.storage, dqs_oe.eq(1)).Else(dqs_oe.eq(dq_oe)) # Write DQS Postamble/Preamble Control Path ------------------------------------------------ # Generates DQS Preamble 1 cycle before the first write and Postamble 1 cycle after the last # write. During writes, DQS tristate is configured as output for at least 3 sys_clk cycles: # 1 for Preamble, 1 for the Write and 1 for the Postamble. - self.comb += dqs_pattern.preamble.eq( wrdata_en.taps[cwl_sys_latency - 1] & ~wrdata_en.taps[cwl_sys_latency]) - self.comb += dqs_pattern.postamble.eq(wrdata_en.taps[cwl_sys_latency + 1] & ~wrdata_en.taps[cwl_sys_latency]) + self.comb += dqs_preamble.eq( wrdata_en.taps[wrtap - 1] & ~wrdata_en.taps[wrtap + 0]) + self.comb += dqs_postamble.eq(wrdata_en.taps[wrtap + 1] & ~wrdata_en.taps[wrtap + 0]) # Xilinx Ultrascale Plus DDR3/DDR4 PHY -------------------------------------------------------------