diff --git a/litedram/common.py b/litedram/common.py index 48aaae0..b5d7eab 100644 --- a/litedram/common.py +++ b/litedram/common.py @@ -108,7 +108,7 @@ class BitSlip(Module): # DQS Pattern -------------------------------------------------------------------------------------- class DQSPattern(Module): - def __init__(self, preamble=None, postamble=None, wlevel_en=0, wlevel_strobe=0): + def __init__(self, preamble=None, postamble=None, wlevel_en=0, wlevel_strobe=0, register=False): self.preamble = Signal() if preamble is None else preamble self.postamble = Signal() if postamble is None else postamble self.o = Signal(8) @@ -117,8 +117,11 @@ class DQSPattern(Module): self.comb += [ self.o.eq(0b01010101), - If(self.preamble | self.postamble, - self.o.eq(0b0000000) + If(self.preamble, + self.o.eq(0b00010101) + ), + If(self.postamble, + self.o.eq(0b01010100) ), If(wlevel_en, self.o.eq(0b00000000), @@ -127,6 +130,10 @@ class DQSPattern(Module): ) ) ] + if register: + o = Signal.like(self.o) + self.sync += o.eq(self.o) + self.o = o # Settings ----------------------------------------------------------------------------------------- diff --git a/litedram/phy/ecp5ddrphy.py b/litedram/phy/ecp5ddrphy.py index b94a6ca..4454c4a 100644 --- a/litedram/phy/ecp5ddrphy.py +++ b/litedram/phy/ecp5ddrphy.py @@ -141,7 +141,6 @@ class ECP5DDRPHY(Module, AutoCSR): bl8_chunk = Signal() rddata_en = Signal(self.settings.read_latency) - wrdata_en = Signal(cwl_sys_latency + 4) # Iterate on pads groups ------------------------------------------------------------------- for pads_group in range(len(pads.groups)): @@ -309,7 +308,7 @@ class ECP5DDRPHY(Module, AutoCSR): dm_bl8_cases = {} dm_bl8_cases[0] = dm_o_data_muxed.eq(dm_o_data[:4]) dm_bl8_cases[1] = dm_o_data_muxed.eq(dm_o_data_d[4:]) - self.sync += Case(bl8_chunk, dm_bl8_cases) + self.sync += Case(bl8_chunk, dm_bl8_cases) # FIXME: use self.comb? self.specials += Instance("ODDRX2DQA", i_RST = ResetSignal("sys2x"), i_ECLK = ClockSignal("sys2x"), @@ -330,10 +329,10 @@ class ECP5DDRPHY(Module, AutoCSR): i_ECLK = ClockSignal("sys2x"), i_SCLK = ClockSignal(), i_DQSW = dqsw, - i_D0 = dqs_pattern.o[3], - i_D1 = dqs_pattern.o[2], - i_D2 = dqs_pattern.o[1], - i_D3 = dqs_pattern.o[0], + i_D0 = 0, # FIXME: dqs_pattern.o[3], + i_D1 = 1, # FIXME: dqs_pattern.o[2], + i_D2 = 0, # FIXME: dqs_pattern.o[1], + i_D3 = 1, # FIXME: dqs_pattern.o[0], o_Q = dqs ), Instance("TSHX2DQSA", @@ -341,8 +340,8 @@ class ECP5DDRPHY(Module, AutoCSR): i_ECLK = ClockSignal("sys2x"), i_SCLK = ClockSignal(), i_DQSW = dqsw, - i_T0 = ~dqs_oe, - i_T1 = ~dqs_oe, + i_T0 = ~(dqs_pattern.preamble | dqs_oe | dqs_pattern.postamble), + i_T1 = ~(dqs_pattern.preamble | dqs_oe | dqs_pattern.postamble), o_Q = dqs_oe_n ), Tristate(pads.dqs_p[i], dqs, ~dqs_oe_n, dqs_i) @@ -372,7 +371,7 @@ class ECP5DDRPHY(Module, AutoCSR): dq_bl8_cases = {} dq_bl8_cases[0] = dq_o_data_muxed.eq(dq_o_data[:4]) dq_bl8_cases[1] = dq_o_data_muxed.eq(dq_o_data_d[4:]) - self.sync += Case(bl8_chunk, dq_bl8_cases) + self.sync += Case(bl8_chunk, dq_bl8_cases) # FIXME: use self.comb? _dq_i_data = Signal(4) self.specials += [ Instance("ODDRX2DQA", @@ -430,8 +429,8 @@ class ECP5DDRPHY(Module, AutoCSR): i_ECLK = ClockSignal("sys2x"), i_SCLK = ClockSignal(), i_DQSW270 = dqsw270, - i_T0 = ~dq_oe, - i_T1 = ~dq_oe, + i_T0 = ~(dqs_pattern.preamble | dq_oe | dqs_pattern.postamble), + i_T1 = ~(dqs_pattern.preamble | dq_oe | dqs_pattern.postamble), o_Q = dq_oe_n, ), Tristate(pads.dq[j], dq_o, ~dq_oe_n, dq_i) @@ -458,16 +457,18 @@ class ECP5DDRPHY(Module, AutoCSR): # interface: The PHY is operating in halfrate mode (so provide 4 datas every sys_clk cycles: # 2x for DDR, 2x for halfrate) but DDR3 requires a burst of 8 datas (BL8) for best efficiency. # Writes are then performed in 2 sys_clk cycles and data needs to be selected for each cycle. - # The DQ/DQS tristates are controlled for 4 sys_clk cycles: Write (2) + Pre/Postamble (2). + # FIXME: understand +2 + wrdata_en = Signal(cwl_sys_latency + 5) wrdata_en_last = Signal.like(wrdata_en) self.comb += wrdata_en.eq(Cat(dfi.phases[self.settings.wrphase].wrdata_en, wrdata_en_last)) self.sync += wrdata_en_last.eq(wrdata_en) - self.sync += dq_oe.eq(wrdata_en[cwl_sys_latency:] != 0b0000) - self.sync += bl8_chunk.eq(wrdata_en[cwl_sys_latency]) + self.comb += dq_oe.eq(wrdata_en[cwl_sys_latency + 2] | wrdata_en[cwl_sys_latency + 3]) + self.comb += bl8_chunk.eq(wrdata_en[cwl_sys_latency + 1]) self.comb += dqs_oe.eq(dq_oe) # Write DQS Postamble/Preamble Control Path ------------------------------------------------ # Generates DQS Preamble 1 cycle before the first write and Postamble 1 cycle after the last - # write. - self.sync += dqs_pattern.preamble.eq( wrdata_en[cwl_sys_latency-3:-2-3] == 0b10) # FIXME: why -3? - self.sync += dqs_pattern.postamble.eq(wrdata_en[cwl_sys_latency-3+2:-3] == 0b01) # FIXME: why -3? + # write. During writes, DQS tristate is configured as output for at least 4 sys_clk cycles: + # 1 for Preamble, 2 for the Write and 1 for the Postamble. + self.comb += dqs_pattern.preamble.eq( wrdata_en[cwl_sys_latency + 1] & ~wrdata_en[cwl_sys_latency + 2]) + self.comb += dqs_pattern.postamble.eq(wrdata_en[cwl_sys_latency + 4] & ~wrdata_en[cwl_sys_latency + 3]) diff --git a/litedram/phy/s7ddrphy.py b/litedram/phy/s7ddrphy.py index c6275cf..c86cde7 100644 --- a/litedram/phy/s7ddrphy.py +++ b/litedram/phy/s7ddrphy.py @@ -25,7 +25,7 @@ class S7DDRPHY(Module, AutoCSR): iodelay_clk_freq = 200e6, cmd_latency = 0, interface_type = "NETWORKING"): - assert not (memtype == "DDR3" and nphases == 2) # FIXME: Needs BL8 support for nphases=2 + assert not (memtype == "DDR3" and nphases == 2) assert interface_type in ["NETWORKING", "MEMORY"] assert not (interface_type == "MEMORY" and nphases == 2) phytype = self.__class__.__name__ @@ -285,11 +285,14 @@ class S7DDRPHY(Module, AutoCSR): ) # DQS and DM ------------------------------------------------------------------------------- - dqs_oe = Signal() - dqs_pattern = DQSPattern( + dqs_oe = Signal() + dqs_oe_delayed = Signal() # Tristate control is asynchronous, needs to be delayed. + dqs_pattern = DQSPattern( wlevel_en = self._wlevel_en.storage, - wlevel_strobe = self._wlevel_strobe.re) + wlevel_strobe = self._wlevel_strobe.re, + register = not with_odelay) self.submodules += dqs_pattern + self.sync += dqs_oe_delayed.eq(dqs_pattern.preamble | dqs_oe | dqs_pattern.postamble) for i in range(databits//8): dm_o_nodelay = Signal() self.specials += Instance("OSERDESE2", @@ -358,7 +361,7 @@ class S7DDRPHY(Module, AutoCSR): o_OFB = dqs_o_no_delay if with_odelay else Signal(), o_OQ = Signal() if with_odelay else dqs_o_no_delay, i_TCE = 1, - i_T1 = ~dqs_oe, + i_T1 = ~dqs_oe_delayed, o_TQ = dqs_t, ) if with_odelay: @@ -402,7 +405,9 @@ class S7DDRPHY(Module, AutoCSR): ) # DQ --------------------------------------------------------------------------------------- - dq_oe = Signal() + dq_oe = Signal() + dq_oe_delayed = Signal() # Tristate control is asynchronous, needs to be delayed. + self.sync += dq_oe_delayed.eq(dqs_pattern.preamble | dq_oe | dqs_pattern.postamble) for i in range(databits): dq_o_nodelay = Signal() dq_o_delayed = Signal() @@ -429,7 +434,7 @@ class S7DDRPHY(Module, AutoCSR): i_D7 = dfi.phases[3].wrdata[i], i_D8 = dfi.phases[3].wrdata[databits+i], i_TCE = 1, - i_T1 = ~dq_oe, + i_T1 = ~dq_oe_delayed, o_TQ = dq_t, i_OCE = 1, o_OQ = dq_o_nodelay, @@ -565,20 +570,20 @@ class S7DDRPHY(Module, AutoCSR): # Write Control Path ----------------------------------------------------------------------- # Creates a shift register of write commands coming from the DFI interface. This shift register - # is used to control DQ/DQS tristates. The DQ/DQS tristates are controlled for 3 sys_clk cycles: - # Write (1) + Pre/Postamble (2). - wrdata_en = Signal(cwl_sys_latency + 3) + # is used to control DQ/DQS tristates. + wrdata_en = Signal(cwl_sys_latency + 2) wrdata_en_last = Signal.like(wrdata_en) self.comb += wrdata_en.eq(Cat(dfi.phases[self.settings.wrphase].wrdata_en, wrdata_en_last)) self.sync += wrdata_en_last.eq(wrdata_en) - self.sync += dq_oe.eq(wrdata_en[cwl_sys_latency:] != 0b000) + self.comb += dq_oe.eq(wrdata_en[cwl_sys_latency]) self.comb += If(self._wlevel_en.storage, dqs_oe.eq(1)).Else(dqs_oe.eq(dq_oe)) # Write DQS Postamble/Preamble Control Path ------------------------------------------------ # Generates DQS Preamble 1 cycle before the first write and Postamble 1 cycle after the last - # write. - self.sync += dqs_pattern.preamble.eq( wrdata_en[cwl_sys_latency:-1] == 0b10) - self.sync += dqs_pattern.postamble.eq(wrdata_en[cwl_sys_latency+1:] == 0b01) + # write. During writes, DQS tristate is configured as output for at least 3 sys_clk cycles: + # 1 for Preamble, 1 for the Write and 1 for the Postamble. + self.comb += dqs_pattern.preamble.eq( wrdata_en[cwl_sys_latency - 1] & ~wrdata_en[cwl_sys_latency]) + self.comb += dqs_pattern.postamble.eq(wrdata_en[cwl_sys_latency + 1] & ~wrdata_en[cwl_sys_latency]) # Xilinx Virtex7 (S7DDRPHY with odelay) ------------------------------------------------------------ diff --git a/litedram/phy/usddrphy.py b/litedram/phy/usddrphy.py index df35e1e..3d345c4 100644 --- a/litedram/phy/usddrphy.py +++ b/litedram/phy/usddrphy.py @@ -273,11 +273,13 @@ class USDDRPHY(Module, AutoCSR): self.comb += pads.ten.eq(0) # DQS and DM ------------------------------------------------------------------------------- - dqs_oe = Signal() - dqs_pattern = DQSPattern( + dqs_oe = Signal() + dqs_oe_delayed = Signal() # Tristate control is asynchronous, needs to be delayed. + dqs_pattern = DQSPattern( wlevel_en = self._wlevel_en.storage, wlevel_strobe = self._wlevel_strobe.re) self.submodules += dqs_pattern + self.sync += dqs_oe_delayed.eq(dqs_pattern.preamble | dqs_oe | dqs_pattern.postamble) for i in range(databits//8): dm_o_nodelay = Signal() self.specials += [ @@ -344,7 +346,7 @@ class USDDRPHY(Module, AutoCSR): i_RST = ResetSignal(), i_CLK = ClockSignal("sys4x"), i_CLKDIV = ClockSignal(), - i_T = ~dqs_oe, + i_T = ~dqs_oe_delayed, i_D = Cat( dqs_pattern.o[0], dqs_pattern.o[1], dqs_pattern.o[2], dqs_pattern.o[3], @@ -382,7 +384,9 @@ class USDDRPHY(Module, AutoCSR): ] # DQ --------------------------------------------------------------------------------------- - dq_oe = Signal() + dq_oe = Signal() + dq_oe_delayed = Signal() # Tristate control is asynchronous, needs to be delayed. + self.sync += dq_oe_delayed.eq(dqs_pattern.preamble | dq_oe | dqs_pattern.postamble) for i in range(databits): dq_o_nodelay = Signal() dq_o_delayed = Signal() @@ -409,7 +413,7 @@ class USDDRPHY(Module, AutoCSR): dfi.phases[1].wrdata[i], dfi.phases[1].wrdata[databits+i], dfi.phases[2].wrdata[i], dfi.phases[2].wrdata[databits+i], dfi.phases[3].wrdata[i], dfi.phases[3].wrdata[databits+i]), - i_T = ~dq_oe, + i_T = ~dq_oe_delayed, o_OQ = dq_o_nodelay, o_T_OUT = dq_t, ), @@ -496,20 +500,20 @@ class USDDRPHY(Module, AutoCSR): # Write Control Path ----------------------------------------------------------------------- # Creates a shift register of write commands coming from the DFI interface. This shift register - # is used to control DQ/DQS tristates. The DQ/DQS tristates are controlled for 3 sys_clk cycles: - # Write (1) + Pre/Postamble (2). - wrdata_en = Signal(cwl_sys_latency + 3) + # is used to control DQ/DQS tristates. + wrdata_en = Signal(cwl_sys_latency + 2) wrdata_en_last = Signal.like(wrdata_en) self.comb += wrdata_en.eq(Cat(dfi.phases[self.settings.wrphase].wrdata_en, wrdata_en_last)) self.sync += wrdata_en_last.eq(wrdata_en) - self.sync += dq_oe.eq(wrdata_en[cwl_sys_latency:] != 0b000) + self.comb += dq_oe.eq(wrdata_en[cwl_sys_latency]) self.comb += If(self._wlevel_en.storage, dqs_oe.eq(1)).Else(dqs_oe.eq(dq_oe)) # Write DQS Postamble/Preamble Control Path ------------------------------------------------ # Generates DQS Preamble 1 cycle before the first write and Postamble 1 cycle after the last - # write. - self.sync += dqs_pattern.preamble.eq( wrdata_en[cwl_sys_latency:-1] == 0b10) - self.sync += dqs_pattern.postamble.eq(wrdata_en[cwl_sys_latency+1:] == 0b01) + # write. During writes, DQS tristate is configured as output for at least 3 sys_clk cycles: + # 1 for Preamble, 1 for the Write and 1 for the Postamble. + self.comb += dqs_pattern.preamble.eq( wrdata_en[cwl_sys_latency - 1] & ~wrdata_en[cwl_sys_latency]) + self.comb += dqs_pattern.postamble.eq(wrdata_en[cwl_sys_latency + 1] & ~wrdata_en[cwl_sys_latency]) # Xilinx Ultrascale Plus DDR3/DDR4 PHY -------------------------------------------------------------