phy: simplify/improve dqs preamble/postamble.

Add some FIXMEs on ECP5DDRPHY.
This commit is contained in:
Florent Kermarrec 2020-04-17 19:50:15 +02:00
parent eaf0691908
commit 48c2fc2cad
4 changed files with 63 additions and 46 deletions

View File

@ -108,7 +108,7 @@ class BitSlip(Module):
# DQS Pattern -------------------------------------------------------------------------------------- # DQS Pattern --------------------------------------------------------------------------------------
class DQSPattern(Module): class DQSPattern(Module):
def __init__(self, preamble=None, postamble=None, wlevel_en=0, wlevel_strobe=0): def __init__(self, preamble=None, postamble=None, wlevel_en=0, wlevel_strobe=0, register=False):
self.preamble = Signal() if preamble is None else preamble self.preamble = Signal() if preamble is None else preamble
self.postamble = Signal() if postamble is None else postamble self.postamble = Signal() if postamble is None else postamble
self.o = Signal(8) self.o = Signal(8)
@ -117,8 +117,11 @@ class DQSPattern(Module):
self.comb += [ self.comb += [
self.o.eq(0b01010101), self.o.eq(0b01010101),
If(self.preamble | self.postamble, If(self.preamble,
self.o.eq(0b0000000) self.o.eq(0b00010101)
),
If(self.postamble,
self.o.eq(0b01010100)
), ),
If(wlevel_en, If(wlevel_en,
self.o.eq(0b00000000), self.o.eq(0b00000000),
@ -127,6 +130,10 @@ class DQSPattern(Module):
) )
) )
] ]
if register:
o = Signal.like(self.o)
self.sync += o.eq(self.o)
self.o = o
# Settings ----------------------------------------------------------------------------------------- # Settings -----------------------------------------------------------------------------------------

View File

@ -141,7 +141,6 @@ class ECP5DDRPHY(Module, AutoCSR):
bl8_chunk = Signal() bl8_chunk = Signal()
rddata_en = Signal(self.settings.read_latency) rddata_en = Signal(self.settings.read_latency)
wrdata_en = Signal(cwl_sys_latency + 4)
# Iterate on pads groups ------------------------------------------------------------------- # Iterate on pads groups -------------------------------------------------------------------
for pads_group in range(len(pads.groups)): for pads_group in range(len(pads.groups)):
@ -309,7 +308,7 @@ class ECP5DDRPHY(Module, AutoCSR):
dm_bl8_cases = {} dm_bl8_cases = {}
dm_bl8_cases[0] = dm_o_data_muxed.eq(dm_o_data[:4]) dm_bl8_cases[0] = dm_o_data_muxed.eq(dm_o_data[:4])
dm_bl8_cases[1] = dm_o_data_muxed.eq(dm_o_data_d[4:]) dm_bl8_cases[1] = dm_o_data_muxed.eq(dm_o_data_d[4:])
self.sync += Case(bl8_chunk, dm_bl8_cases) self.sync += Case(bl8_chunk, dm_bl8_cases) # FIXME: use self.comb?
self.specials += Instance("ODDRX2DQA", self.specials += Instance("ODDRX2DQA",
i_RST = ResetSignal("sys2x"), i_RST = ResetSignal("sys2x"),
i_ECLK = ClockSignal("sys2x"), i_ECLK = ClockSignal("sys2x"),
@ -330,10 +329,10 @@ class ECP5DDRPHY(Module, AutoCSR):
i_ECLK = ClockSignal("sys2x"), i_ECLK = ClockSignal("sys2x"),
i_SCLK = ClockSignal(), i_SCLK = ClockSignal(),
i_DQSW = dqsw, i_DQSW = dqsw,
i_D0 = dqs_pattern.o[3], i_D0 = 0, # FIXME: dqs_pattern.o[3],
i_D1 = dqs_pattern.o[2], i_D1 = 1, # FIXME: dqs_pattern.o[2],
i_D2 = dqs_pattern.o[1], i_D2 = 0, # FIXME: dqs_pattern.o[1],
i_D3 = dqs_pattern.o[0], i_D3 = 1, # FIXME: dqs_pattern.o[0],
o_Q = dqs o_Q = dqs
), ),
Instance("TSHX2DQSA", Instance("TSHX2DQSA",
@ -341,8 +340,8 @@ class ECP5DDRPHY(Module, AutoCSR):
i_ECLK = ClockSignal("sys2x"), i_ECLK = ClockSignal("sys2x"),
i_SCLK = ClockSignal(), i_SCLK = ClockSignal(),
i_DQSW = dqsw, i_DQSW = dqsw,
i_T0 = ~dqs_oe, i_T0 = ~(dqs_pattern.preamble | dqs_oe | dqs_pattern.postamble),
i_T1 = ~dqs_oe, i_T1 = ~(dqs_pattern.preamble | dqs_oe | dqs_pattern.postamble),
o_Q = dqs_oe_n o_Q = dqs_oe_n
), ),
Tristate(pads.dqs_p[i], dqs, ~dqs_oe_n, dqs_i) Tristate(pads.dqs_p[i], dqs, ~dqs_oe_n, dqs_i)
@ -372,7 +371,7 @@ class ECP5DDRPHY(Module, AutoCSR):
dq_bl8_cases = {} dq_bl8_cases = {}
dq_bl8_cases[0] = dq_o_data_muxed.eq(dq_o_data[:4]) dq_bl8_cases[0] = dq_o_data_muxed.eq(dq_o_data[:4])
dq_bl8_cases[1] = dq_o_data_muxed.eq(dq_o_data_d[4:]) dq_bl8_cases[1] = dq_o_data_muxed.eq(dq_o_data_d[4:])
self.sync += Case(bl8_chunk, dq_bl8_cases) self.sync += Case(bl8_chunk, dq_bl8_cases) # FIXME: use self.comb?
_dq_i_data = Signal(4) _dq_i_data = Signal(4)
self.specials += [ self.specials += [
Instance("ODDRX2DQA", Instance("ODDRX2DQA",
@ -430,8 +429,8 @@ class ECP5DDRPHY(Module, AutoCSR):
i_ECLK = ClockSignal("sys2x"), i_ECLK = ClockSignal("sys2x"),
i_SCLK = ClockSignal(), i_SCLK = ClockSignal(),
i_DQSW270 = dqsw270, i_DQSW270 = dqsw270,
i_T0 = ~dq_oe, i_T0 = ~(dqs_pattern.preamble | dq_oe | dqs_pattern.postamble),
i_T1 = ~dq_oe, i_T1 = ~(dqs_pattern.preamble | dq_oe | dqs_pattern.postamble),
o_Q = dq_oe_n, o_Q = dq_oe_n,
), ),
Tristate(pads.dq[j], dq_o, ~dq_oe_n, dq_i) Tristate(pads.dq[j], dq_o, ~dq_oe_n, dq_i)
@ -458,16 +457,18 @@ class ECP5DDRPHY(Module, AutoCSR):
# interface: The PHY is operating in halfrate mode (so provide 4 datas every sys_clk cycles: # interface: The PHY is operating in halfrate mode (so provide 4 datas every sys_clk cycles:
# 2x for DDR, 2x for halfrate) but DDR3 requires a burst of 8 datas (BL8) for best efficiency. # 2x for DDR, 2x for halfrate) but DDR3 requires a burst of 8 datas (BL8) for best efficiency.
# Writes are then performed in 2 sys_clk cycles and data needs to be selected for each cycle. # Writes are then performed in 2 sys_clk cycles and data needs to be selected for each cycle.
# The DQ/DQS tristates are controlled for 4 sys_clk cycles: Write (2) + Pre/Postamble (2). # FIXME: understand +2
wrdata_en = Signal(cwl_sys_latency + 5)
wrdata_en_last = Signal.like(wrdata_en) wrdata_en_last = Signal.like(wrdata_en)
self.comb += wrdata_en.eq(Cat(dfi.phases[self.settings.wrphase].wrdata_en, wrdata_en_last)) self.comb += wrdata_en.eq(Cat(dfi.phases[self.settings.wrphase].wrdata_en, wrdata_en_last))
self.sync += wrdata_en_last.eq(wrdata_en) self.sync += wrdata_en_last.eq(wrdata_en)
self.sync += dq_oe.eq(wrdata_en[cwl_sys_latency:] != 0b0000) self.comb += dq_oe.eq(wrdata_en[cwl_sys_latency + 2] | wrdata_en[cwl_sys_latency + 3])
self.sync += bl8_chunk.eq(wrdata_en[cwl_sys_latency]) self.comb += bl8_chunk.eq(wrdata_en[cwl_sys_latency + 1])
self.comb += dqs_oe.eq(dq_oe) self.comb += dqs_oe.eq(dq_oe)
# Write DQS Postamble/Preamble Control Path ------------------------------------------------ # Write DQS Postamble/Preamble Control Path ------------------------------------------------
# Generates DQS Preamble 1 cycle before the first write and Postamble 1 cycle after the last # Generates DQS Preamble 1 cycle before the first write and Postamble 1 cycle after the last
# write. # write. During writes, DQS tristate is configured as output for at least 4 sys_clk cycles:
self.sync += dqs_pattern.preamble.eq( wrdata_en[cwl_sys_latency-3:-2-3] == 0b10) # FIXME: why -3? # 1 for Preamble, 2 for the Write and 1 for the Postamble.
self.sync += dqs_pattern.postamble.eq(wrdata_en[cwl_sys_latency-3+2:-3] == 0b01) # FIXME: why -3? self.comb += dqs_pattern.preamble.eq( wrdata_en[cwl_sys_latency + 1] & ~wrdata_en[cwl_sys_latency + 2])
self.comb += dqs_pattern.postamble.eq(wrdata_en[cwl_sys_latency + 4] & ~wrdata_en[cwl_sys_latency + 3])

View File

@ -25,7 +25,7 @@ class S7DDRPHY(Module, AutoCSR):
iodelay_clk_freq = 200e6, iodelay_clk_freq = 200e6,
cmd_latency = 0, cmd_latency = 0,
interface_type = "NETWORKING"): interface_type = "NETWORKING"):
assert not (memtype == "DDR3" and nphases == 2) # FIXME: Needs BL8 support for nphases=2 assert not (memtype == "DDR3" and nphases == 2)
assert interface_type in ["NETWORKING", "MEMORY"] assert interface_type in ["NETWORKING", "MEMORY"]
assert not (interface_type == "MEMORY" and nphases == 2) assert not (interface_type == "MEMORY" and nphases == 2)
phytype = self.__class__.__name__ phytype = self.__class__.__name__
@ -285,11 +285,14 @@ class S7DDRPHY(Module, AutoCSR):
) )
# DQS and DM ------------------------------------------------------------------------------- # DQS and DM -------------------------------------------------------------------------------
dqs_oe = Signal() dqs_oe = Signal()
dqs_pattern = DQSPattern( dqs_oe_delayed = Signal() # Tristate control is asynchronous, needs to be delayed.
dqs_pattern = DQSPattern(
wlevel_en = self._wlevel_en.storage, wlevel_en = self._wlevel_en.storage,
wlevel_strobe = self._wlevel_strobe.re) wlevel_strobe = self._wlevel_strobe.re,
register = not with_odelay)
self.submodules += dqs_pattern self.submodules += dqs_pattern
self.sync += dqs_oe_delayed.eq(dqs_pattern.preamble | dqs_oe | dqs_pattern.postamble)
for i in range(databits//8): for i in range(databits//8):
dm_o_nodelay = Signal() dm_o_nodelay = Signal()
self.specials += Instance("OSERDESE2", self.specials += Instance("OSERDESE2",
@ -358,7 +361,7 @@ class S7DDRPHY(Module, AutoCSR):
o_OFB = dqs_o_no_delay if with_odelay else Signal(), o_OFB = dqs_o_no_delay if with_odelay else Signal(),
o_OQ = Signal() if with_odelay else dqs_o_no_delay, o_OQ = Signal() if with_odelay else dqs_o_no_delay,
i_TCE = 1, i_TCE = 1,
i_T1 = ~dqs_oe, i_T1 = ~dqs_oe_delayed,
o_TQ = dqs_t, o_TQ = dqs_t,
) )
if with_odelay: if with_odelay:
@ -402,7 +405,9 @@ class S7DDRPHY(Module, AutoCSR):
) )
# DQ --------------------------------------------------------------------------------------- # DQ ---------------------------------------------------------------------------------------
dq_oe = Signal() dq_oe = Signal()
dq_oe_delayed = Signal() # Tristate control is asynchronous, needs to be delayed.
self.sync += dq_oe_delayed.eq(dqs_pattern.preamble | dq_oe | dqs_pattern.postamble)
for i in range(databits): for i in range(databits):
dq_o_nodelay = Signal() dq_o_nodelay = Signal()
dq_o_delayed = Signal() dq_o_delayed = Signal()
@ -429,7 +434,7 @@ class S7DDRPHY(Module, AutoCSR):
i_D7 = dfi.phases[3].wrdata[i], i_D7 = dfi.phases[3].wrdata[i],
i_D8 = dfi.phases[3].wrdata[databits+i], i_D8 = dfi.phases[3].wrdata[databits+i],
i_TCE = 1, i_TCE = 1,
i_T1 = ~dq_oe, i_T1 = ~dq_oe_delayed,
o_TQ = dq_t, o_TQ = dq_t,
i_OCE = 1, i_OCE = 1,
o_OQ = dq_o_nodelay, o_OQ = dq_o_nodelay,
@ -565,20 +570,20 @@ class S7DDRPHY(Module, AutoCSR):
# Write Control Path ----------------------------------------------------------------------- # Write Control Path -----------------------------------------------------------------------
# Creates a shift register of write commands coming from the DFI interface. This shift register # Creates a shift register of write commands coming from the DFI interface. This shift register
# is used to control DQ/DQS tristates. The DQ/DQS tristates are controlled for 3 sys_clk cycles: # is used to control DQ/DQS tristates.
# Write (1) + Pre/Postamble (2). wrdata_en = Signal(cwl_sys_latency + 2)
wrdata_en = Signal(cwl_sys_latency + 3)
wrdata_en_last = Signal.like(wrdata_en) wrdata_en_last = Signal.like(wrdata_en)
self.comb += wrdata_en.eq(Cat(dfi.phases[self.settings.wrphase].wrdata_en, wrdata_en_last)) self.comb += wrdata_en.eq(Cat(dfi.phases[self.settings.wrphase].wrdata_en, wrdata_en_last))
self.sync += wrdata_en_last.eq(wrdata_en) self.sync += wrdata_en_last.eq(wrdata_en)
self.sync += dq_oe.eq(wrdata_en[cwl_sys_latency:] != 0b000) self.comb += dq_oe.eq(wrdata_en[cwl_sys_latency])
self.comb += If(self._wlevel_en.storage, dqs_oe.eq(1)).Else(dqs_oe.eq(dq_oe)) self.comb += If(self._wlevel_en.storage, dqs_oe.eq(1)).Else(dqs_oe.eq(dq_oe))
# Write DQS Postamble/Preamble Control Path ------------------------------------------------ # Write DQS Postamble/Preamble Control Path ------------------------------------------------
# Generates DQS Preamble 1 cycle before the first write and Postamble 1 cycle after the last # Generates DQS Preamble 1 cycle before the first write and Postamble 1 cycle after the last
# write. # write. During writes, DQS tristate is configured as output for at least 3 sys_clk cycles:
self.sync += dqs_pattern.preamble.eq( wrdata_en[cwl_sys_latency:-1] == 0b10) # 1 for Preamble, 1 for the Write and 1 for the Postamble.
self.sync += dqs_pattern.postamble.eq(wrdata_en[cwl_sys_latency+1:] == 0b01) self.comb += dqs_pattern.preamble.eq( wrdata_en[cwl_sys_latency - 1] & ~wrdata_en[cwl_sys_latency])
self.comb += dqs_pattern.postamble.eq(wrdata_en[cwl_sys_latency + 1] & ~wrdata_en[cwl_sys_latency])
# Xilinx Virtex7 (S7DDRPHY with odelay) ------------------------------------------------------------ # Xilinx Virtex7 (S7DDRPHY with odelay) ------------------------------------------------------------

View File

@ -273,11 +273,13 @@ class USDDRPHY(Module, AutoCSR):
self.comb += pads.ten.eq(0) self.comb += pads.ten.eq(0)
# DQS and DM ------------------------------------------------------------------------------- # DQS and DM -------------------------------------------------------------------------------
dqs_oe = Signal() dqs_oe = Signal()
dqs_pattern = DQSPattern( dqs_oe_delayed = Signal() # Tristate control is asynchronous, needs to be delayed.
dqs_pattern = DQSPattern(
wlevel_en = self._wlevel_en.storage, wlevel_en = self._wlevel_en.storage,
wlevel_strobe = self._wlevel_strobe.re) wlevel_strobe = self._wlevel_strobe.re)
self.submodules += dqs_pattern self.submodules += dqs_pattern
self.sync += dqs_oe_delayed.eq(dqs_pattern.preamble | dqs_oe | dqs_pattern.postamble)
for i in range(databits//8): for i in range(databits//8):
dm_o_nodelay = Signal() dm_o_nodelay = Signal()
self.specials += [ self.specials += [
@ -344,7 +346,7 @@ class USDDRPHY(Module, AutoCSR):
i_RST = ResetSignal(), i_RST = ResetSignal(),
i_CLK = ClockSignal("sys4x"), i_CLK = ClockSignal("sys4x"),
i_CLKDIV = ClockSignal(), i_CLKDIV = ClockSignal(),
i_T = ~dqs_oe, i_T = ~dqs_oe_delayed,
i_D = Cat( i_D = Cat(
dqs_pattern.o[0], dqs_pattern.o[1], dqs_pattern.o[0], dqs_pattern.o[1],
dqs_pattern.o[2], dqs_pattern.o[3], dqs_pattern.o[2], dqs_pattern.o[3],
@ -382,7 +384,9 @@ class USDDRPHY(Module, AutoCSR):
] ]
# DQ --------------------------------------------------------------------------------------- # DQ ---------------------------------------------------------------------------------------
dq_oe = Signal() dq_oe = Signal()
dq_oe_delayed = Signal() # Tristate control is asynchronous, needs to be delayed.
self.sync += dq_oe_delayed.eq(dqs_pattern.preamble | dq_oe | dqs_pattern.postamble)
for i in range(databits): for i in range(databits):
dq_o_nodelay = Signal() dq_o_nodelay = Signal()
dq_o_delayed = Signal() dq_o_delayed = Signal()
@ -409,7 +413,7 @@ class USDDRPHY(Module, AutoCSR):
dfi.phases[1].wrdata[i], dfi.phases[1].wrdata[databits+i], dfi.phases[1].wrdata[i], dfi.phases[1].wrdata[databits+i],
dfi.phases[2].wrdata[i], dfi.phases[2].wrdata[databits+i], dfi.phases[2].wrdata[i], dfi.phases[2].wrdata[databits+i],
dfi.phases[3].wrdata[i], dfi.phases[3].wrdata[databits+i]), dfi.phases[3].wrdata[i], dfi.phases[3].wrdata[databits+i]),
i_T = ~dq_oe, i_T = ~dq_oe_delayed,
o_OQ = dq_o_nodelay, o_OQ = dq_o_nodelay,
o_T_OUT = dq_t, o_T_OUT = dq_t,
), ),
@ -496,20 +500,20 @@ class USDDRPHY(Module, AutoCSR):
# Write Control Path ----------------------------------------------------------------------- # Write Control Path -----------------------------------------------------------------------
# Creates a shift register of write commands coming from the DFI interface. This shift register # Creates a shift register of write commands coming from the DFI interface. This shift register
# is used to control DQ/DQS tristates. The DQ/DQS tristates are controlled for 3 sys_clk cycles: # is used to control DQ/DQS tristates.
# Write (1) + Pre/Postamble (2). wrdata_en = Signal(cwl_sys_latency + 2)
wrdata_en = Signal(cwl_sys_latency + 3)
wrdata_en_last = Signal.like(wrdata_en) wrdata_en_last = Signal.like(wrdata_en)
self.comb += wrdata_en.eq(Cat(dfi.phases[self.settings.wrphase].wrdata_en, wrdata_en_last)) self.comb += wrdata_en.eq(Cat(dfi.phases[self.settings.wrphase].wrdata_en, wrdata_en_last))
self.sync += wrdata_en_last.eq(wrdata_en) self.sync += wrdata_en_last.eq(wrdata_en)
self.sync += dq_oe.eq(wrdata_en[cwl_sys_latency:] != 0b000) self.comb += dq_oe.eq(wrdata_en[cwl_sys_latency])
self.comb += If(self._wlevel_en.storage, dqs_oe.eq(1)).Else(dqs_oe.eq(dq_oe)) self.comb += If(self._wlevel_en.storage, dqs_oe.eq(1)).Else(dqs_oe.eq(dq_oe))
# Write DQS Postamble/Preamble Control Path ------------------------------------------------ # Write DQS Postamble/Preamble Control Path ------------------------------------------------
# Generates DQS Preamble 1 cycle before the first write and Postamble 1 cycle after the last # Generates DQS Preamble 1 cycle before the first write and Postamble 1 cycle after the last
# write. # write. During writes, DQS tristate is configured as output for at least 3 sys_clk cycles:
self.sync += dqs_pattern.preamble.eq( wrdata_en[cwl_sys_latency:-1] == 0b10) # 1 for Preamble, 1 for the Write and 1 for the Postamble.
self.sync += dqs_pattern.postamble.eq(wrdata_en[cwl_sys_latency+1:] == 0b01) self.comb += dqs_pattern.preamble.eq( wrdata_en[cwl_sys_latency - 1] & ~wrdata_en[cwl_sys_latency])
self.comb += dqs_pattern.postamble.eq(wrdata_en[cwl_sys_latency + 1] & ~wrdata_en[cwl_sys_latency])
# Xilinx Ultrascale Plus DDR3/DDR4 PHY ------------------------------------------------------------- # Xilinx Ultrascale Plus DDR3/DDR4 PHY -------------------------------------------------------------