lpddr4/s7phy: improve to use the new DoubleRateLPDDR4PHY
This commit is contained in:
parent
2ab763ac5e
commit
4a5feb9e11
|
@ -230,7 +230,7 @@ class LPDDR4PHY(Module, AutoCSR):
|
|||
dw = 2*nphases,
|
||||
cycles = bitslip_cycles,
|
||||
rst = self.get_rst(bit//8, self._wdly_dq_bitslip_rst),
|
||||
slp = self.get_slp(bit//8, self._wdly_dq_bitslip),
|
||||
slp = self.get_inc(bit//8, self._wdly_dq_bitslip),
|
||||
i = Cat(*wrdata),
|
||||
o = self.out.dq_o[bit],
|
||||
)
|
||||
|
@ -241,7 +241,7 @@ class LPDDR4PHY(Module, AutoCSR):
|
|||
dw = 2*nphases,
|
||||
cycles = bitslip_cycles,
|
||||
rst = self.get_rst(bit//8, self._rdly_dq_bitslip_rst),
|
||||
slp = self.get_slp(bit//8, self._rdly_dq_bitslip),
|
||||
slp = self.get_inc(bit//8, self._rdly_dq_bitslip),
|
||||
i = self.out.dq_i[bit],
|
||||
o = dq_i_bs,
|
||||
)
|
||||
|
@ -268,7 +268,7 @@ class LPDDR4PHY(Module, AutoCSR):
|
|||
dw = 2*nphases,
|
||||
cycles = bitslip_cycles,
|
||||
rst = self.get_rst(byte, self._wdly_dq_bitslip_rst),
|
||||
slp = self.get_slp(byte, self._wdly_dq_bitslip),
|
||||
slp = self.get_inc(byte, self._wdly_dq_bitslip),
|
||||
i = dqs_pattern.o,
|
||||
o = self.out.dqs_o[byte],
|
||||
)
|
||||
|
@ -292,7 +292,7 @@ class LPDDR4PHY(Module, AutoCSR):
|
|||
dw = 2*nphases,
|
||||
cycles = bitslip_cycles,
|
||||
rst = self.get_rst(byte, self._wdly_dq_bitslip_rst),
|
||||
slp = self.get_slp(byte, self._wdly_dq_bitslip),
|
||||
slp = self.get_inc(byte, self._wdly_dq_bitslip),
|
||||
i = Cat(*wrdata_mask),
|
||||
o = self.out.dmi_o[byte],
|
||||
)
|
||||
|
@ -340,10 +340,12 @@ class LPDDR4PHY(Module, AutoCSR):
|
|||
self.comb += dqs_postamble.eq(wrdata_en_tap(wrtap + 1) & ~wrdata_en_tap(wrtap + 0))
|
||||
|
||||
def get_rst(self, byte, rst_csr):
|
||||
assert isinstance(rst_csr, CSR) and rst_csr.name.endswith("rst"), rst_csr
|
||||
return (self._dly_sel.storage[byte] & rst_csr.re) | self._rst.storage
|
||||
|
||||
def get_slp(self, byte, slp_csr):
|
||||
return self._dly_sel.storage[byte] & slp_csr.re
|
||||
def get_inc(self, byte, inc_csr):
|
||||
assert isinstance(inc_csr, CSR) and not inc_csr.name.endswith("rst"), inc_csr
|
||||
return self._dly_sel.storage[byte] & inc_csr.re
|
||||
|
||||
|
||||
class DoubleRateLPDDR4PHY(LPDDR4PHY):
|
||||
|
|
|
@ -5,125 +5,168 @@ from litex.soc.interconnect.csr import *
|
|||
from litedram.common import *
|
||||
from litedram.phy.dfi import *
|
||||
|
||||
from litedram.phy.lpddr4.utils import delayed
|
||||
from litedram.phy.lpddr4.basephy import LPDDR4PHY
|
||||
from litedram.phy.lpddr4.utils import delayed as delayed
|
||||
from litedram.phy.lpddr4.basephy import DoubleRateLPDDR4PHY, Latency
|
||||
|
||||
|
||||
# TODO: add option to avoid ODELAYE2, for now it won't work on Artix7
|
||||
class S7LPDDR4PHY(LPDDR4PHY):
|
||||
class S7LPDDR4PHY(DoubleRateLPDDR4PHY):
|
||||
def __init__(self, pads, *, iodelay_clk_freq, **kwargs):
|
||||
# TODO: add `with_odelay` argument to avoid ODELAYE2, currently it won't work on Artix7
|
||||
self.iodelay_clk_freq = iodelay_clk_freq
|
||||
|
||||
# DoubleRateLPDDR4PHY outputs half-width signals (comparing to LPDDR4PHY) in sys2x domain.
|
||||
# This allows us to use 8:1 DDR OSERDESE2/ISERDESE2 to (de-)serialize the data.
|
||||
_sys2x = 4
|
||||
super().__init__(pads,
|
||||
# TODO: verify
|
||||
write_ser_latency = 1, # OSERDESE2 8:1 DDR (4 full-rate clocks)
|
||||
read_des_latency = 2, # ISERDESE2 NETWORKING
|
||||
phytype = self.__class__.__name__,
|
||||
ser_latency = Latency(sys=0, sys8x=1*_sys2x), # OSERDESE2 8:1 DDR (4 full-rate clocks)
|
||||
des_latency = Latency(sys=2, sys8x=2*_sys2x), # ISERDESE2 NETWORKING
|
||||
phytype = self.__class__.__name__,
|
||||
**kwargs
|
||||
)
|
||||
|
||||
self.submodules.sys2x_delay = ClockDomainsRenamer("sys2x")(Module())
|
||||
|
||||
# Parameters -------------------------------------------------------------------------------
|
||||
iodelay_tap_average = {
|
||||
200e6: 78e-12,
|
||||
300e6: 52e-12,
|
||||
400e6: 39e-12, # Only valid for -3 and -2/2E speed grades
|
||||
}
|
||||
half_sys8x_taps = math.floor(self.tck/(4*iodelay_tap_average[iodelay_clk_freq]))
|
||||
# Calculate value of taps needed to shift a signal by 90 degrees.
|
||||
# Using iodelay_clk_freq of 300MHz/400MHz is only valid for -3 and -2/2E speed grades.
|
||||
# FIXME: this should be named sys16x, but using sys8x due to a name hard-coded in BIOS
|
||||
assert iodelay_clk_freq in [200e6, 300e6, 400e6]
|
||||
iodelay_tap_average = 1 / (2*32 * iodelay_clk_freq)
|
||||
half_sys8x_taps = math.floor(self.tck / (4 * iodelay_tap_average))
|
||||
|
||||
# Registers --------------------------------------------------------------------------------
|
||||
self._half_sys8x_taps = CSRStorage(5, reset=half_sys8x_taps)
|
||||
|
||||
# odelay control
|
||||
self._cdly_rst = CSR()
|
||||
self._cdly_inc = CSR()
|
||||
self._rdly_dq_rst = CSR()
|
||||
self._rdly_dq_inc = CSR()
|
||||
self._wdly_dq_rst = CSR()
|
||||
self._wdly_dq_inc = CSR()
|
||||
self._wdly_dqs_rst = CSR()
|
||||
self._wdly_dqs_inc = CSR()
|
||||
self._cdly_rst = CSR()
|
||||
self._cdly_inc = CSR()
|
||||
self._rdly_dq_rst = CSR()
|
||||
self._rdly_dq_inc = CSR()
|
||||
self._wdly_dq_rst = CSR()
|
||||
self._wdly_dq_inc = CSR()
|
||||
self._wdly_dqs_rst = CSR()
|
||||
self._wdly_dqs_inc = CSR()
|
||||
|
||||
cdly_rst = self._cdly_rst.re | self._rst.storage
|
||||
cdly_inc = self._cdly_inc.re
|
||||
|
||||
# Serialization ----------------------------------------------------------------------------
|
||||
# TODO: need to implement half-serialization from sys (16 bits) to sys2x (8 bits) before oserdese
|
||||
|
||||
# Clock
|
||||
clk_ser = Signal()
|
||||
clk_dly = Signal()
|
||||
self.oserdese2_ddr(din=self.ck_clk, dout=clk_ser, clk="sys8x")
|
||||
self.oserdese2_ddr(din=self.out.clk, dout=clk_ser, clk="sys8x")
|
||||
self.odelaye2(din=clk_ser, dout=clk_dly, rst=cdly_rst, inc=cdly_inc)
|
||||
self.obufds(din=clk_dly, dout=self.pads.clk_p, dout_b=self.pads.clk_n)
|
||||
|
||||
# probably no need for oserdese
|
||||
# FIXME: probably no need to serialize those
|
||||
for cmd in ["cke", "odt", "reset_n"]:
|
||||
cmd_ser = Signal()
|
||||
self.oserdese2_ddr(din=getattr(self, f"ck_{cmd}"), dout=cmd_ser, clk="sys8x")
|
||||
self.oserdese2_sdr(din=getattr(self.out, cmd), dout=cmd_ser, clk="sys8x")
|
||||
self.odelaye2(din=cmd_ser, dout=getattr(self.pads, cmd), rst=cdly_rst, inc=cdly_inc)
|
||||
|
||||
# Commands
|
||||
cs_ser = Signal()
|
||||
self.oserdese2_ddr(din=self.ck_cs, dout=cs_ser, clk="sys8x")
|
||||
self.oserdese2_sdr(din=self.out.cs, dout=cs_ser, clk="sys8x")
|
||||
self.odelaye2(din=cs_ser, dout=self.pads.cs, rst=cdly_rst, inc=cdly_inc)
|
||||
for i in range(6):
|
||||
for bit in range(6):
|
||||
ca_ser = Signal()
|
||||
self.oserdese2_ddr(din=self.ck_ca[i], dout=ca_ser, clk="sys8x")
|
||||
self.odelaye2(din=ca_ser, dout=self.pads.ca[i], rst=cdly_rst, inc=cdly_inc)
|
||||
self.oserdese2_sdr(din=self.out.ca[bit], dout=ca_ser, clk="sys8x")
|
||||
self.odelaye2(din=ca_ser, dout=self.pads.ca[bit], rst=cdly_rst, inc=cdly_inc)
|
||||
|
||||
# DQS
|
||||
for i in range(self.databits//8):
|
||||
for byte in range(self.databits//8):
|
||||
# DQS
|
||||
dqs_t = Signal()
|
||||
dqs_t = Signal()
|
||||
dqs_ser = Signal()
|
||||
dqs_dly = Signal()
|
||||
rst = (self._dly_sel.storage[i] & self._wdly_dqs_rst.re) | self._rst.storage
|
||||
inc = self._dly_sel.storage[i] & self._wdly_dqs_inc.re
|
||||
self.oserdese2_ddr(
|
||||
din=self.ck_dqs_o[i], dout=dqs_ser,
|
||||
tin=~self.dqs_oe, tout=dqs_t,
|
||||
clk="sys8x")
|
||||
self.odelaye2(din=dqs_ser, dout=dqs_dly, rst=rst, inc=inc)
|
||||
din = self.out.dqs_o[byte],
|
||||
dout_fb = dqs_ser,
|
||||
tin = ~self.out.dqs_oe,
|
||||
tout = dqs_t,
|
||||
clk = "sys8x", # TODO: if odelay is not avaiable need to use sys8x_90
|
||||
)
|
||||
self.odelaye2(
|
||||
din = dqs_ser,
|
||||
dout = dqs_dly,
|
||||
rst = self.get_rst(byte, self._wdly_dqs_rst),
|
||||
inc = self.get_inc(byte, self._wdly_dqs_inc),
|
||||
init = half_sys8x_taps, # shifts by 90 degrees
|
||||
)
|
||||
self.iobufds(
|
||||
din=dqs_dly, dout=Signal(),
|
||||
dinout=self.pads.dqs_p[i], dinout_b=self.pads.dqs_n[i],
|
||||
tin=dqs_t)
|
||||
din = dqs_dly,
|
||||
dout = Signal(), # TODO: DQS input path
|
||||
tin = dqs_t,
|
||||
dinout = self.pads.dqs_p[byte],
|
||||
dinout_b = self.pads.dqs_n[byte],
|
||||
)
|
||||
|
||||
# DMI
|
||||
for i in range(self.databits//8):
|
||||
dmi_t = Signal()
|
||||
for byte in range(self.databits//8):
|
||||
dmi_t = Signal()
|
||||
dmi_ser = Signal()
|
||||
dmi_dly = Signal()
|
||||
rst = (self._dly_sel.storage[i] & self._wdly_dq_rst.re) | self._rst.storage
|
||||
inc = self._dly_sel.storage[i] & self._wdly_dq_inc.re
|
||||
self.oserdese2_ddr(
|
||||
din=self.ck_dmi_o[i], dout=dmi_ser,
|
||||
tin=~self.dmi_oe, tout=dmi_t,
|
||||
clk="sys8x")
|
||||
self.odelaye2(din=dmi_ser, dout=dmi_dly, rst=rst, inc=inc)
|
||||
self.iobuf(din=dmi_dly, dout=Signal(), dinout=self.pads.dmi[i], tin=dmi_t)
|
||||
din = self.out.dmi_o[byte],
|
||||
dout_fb = dmi_ser,
|
||||
tin = ~self.out.dmi_oe,
|
||||
tout = dmi_t,
|
||||
clk = "sys8x",
|
||||
)
|
||||
self.odelaye2(
|
||||
din = dmi_ser,
|
||||
dout = dmi_dly,
|
||||
rst = self.get_rst(byte, self._wdly_dq_rst),
|
||||
inc = self.get_inc(byte, self._wdly_dq_inc),
|
||||
)
|
||||
self.iobuf(
|
||||
din = dmi_dly,
|
||||
dout = Signal(),
|
||||
tin = dmi_t,
|
||||
dinout = self.pads.dmi[byte],
|
||||
)
|
||||
|
||||
# DQ
|
||||
for i in range(self.databits):
|
||||
dq_t = Signal()
|
||||
dq_ser = Signal()
|
||||
dq_dly = Signal()
|
||||
dq_i = Signal()
|
||||
for bit in range(self.databits):
|
||||
dq_t = Signal()
|
||||
dq_ser = Signal()
|
||||
dq_dly = Signal()
|
||||
dq_i = Signal()
|
||||
dq_i_dly = Signal()
|
||||
|
||||
rst_w = (self._dly_sel.storage[i//8] & self._wdly_dq_rst.re) | self._rst.storage
|
||||
inc_w = self._dly_sel.storage[i//8] & self._wdly_dq_inc.re
|
||||
rst_r = (self._dly_sel.storage[i//8] & self._rdly_dq_rst.re) | self._rst.storage
|
||||
inc_r = self._dly_sel.storage[i//8] & self._rdly_dq_inc.re
|
||||
|
||||
self.oserdese2_ddr(
|
||||
din=self.ck_dq_o[i], dout=dq_ser,
|
||||
tin=~self.dq_oe, tout=dq_t,
|
||||
clk="sys8x")
|
||||
self.odelaye2(din=dq_ser, dout=dq_dly, rst=rst_w, inc=inc_w)
|
||||
self.iobuf(din=dq_dly, dout=dq_i, dinout=self.pads.dq[i], tin=dq_t)
|
||||
self.idelaye2(din=dq_i, dout=dq_i_dly, rst=rst_r, inc=inc_r)
|
||||
self.iserdese2_ddr(din=dq_i_dly, dout=self.ck_dq_i[i], clk="sys8x")
|
||||
din = self.out.dq_o[bit],
|
||||
dout_fb = dq_ser, # TODO: compare: S7DDRPHY uses OQ not OFB
|
||||
tin = ~self.out.dq_oe,
|
||||
tout = dq_t,
|
||||
clk = "sys8x",
|
||||
)
|
||||
self.odelaye2(
|
||||
din = dq_ser,
|
||||
dout = dq_dly,
|
||||
rst = self.get_rst(bit//8, self._wdly_dq_rst),
|
||||
inc = self.get_inc(bit//8, self._wdly_dq_inc),
|
||||
)
|
||||
self.iobuf(
|
||||
din = dq_dly,
|
||||
dout = dq_i,
|
||||
dinout = self.pads.dq[bit],
|
||||
tin = dq_t
|
||||
)
|
||||
self.idelaye2(
|
||||
din = dq_i,
|
||||
dout = dq_i_dly,
|
||||
rst = self.get_rst(bit//8, self._rdly_dq_rst),
|
||||
inc = self.get_inc(bit//8, self._rdly_dq_inc)
|
||||
)
|
||||
self.iserdese2_ddr(
|
||||
din = dq_i_dly,
|
||||
dout = self.out.dq_i[bit],
|
||||
clk = "sys8x"
|
||||
)
|
||||
|
||||
def delayed_sys2x(self, sig, **kwargs):
|
||||
return delayed(self.sys2x_delay, sig, **kwargs)
|
||||
|
||||
def idelaye2(self, *, din, dout, init=0, rst=None, inc=None):
|
||||
assert not ((rst is None) ^ (inc is None))
|
||||
|
@ -154,9 +197,10 @@ class S7LPDDR4PHY(LPDDR4PHY):
|
|||
|
||||
self.specials += Instance("IDELAYE2", **params)
|
||||
|
||||
def odelaye2(self, *, din, dout, init=0, rst=None, inc=None): # Not available for Artix7
|
||||
def odelaye2(self, *, din, dout, clk=None, init=0, rst=None, inc=None): # Not available for Artix7
|
||||
assert not ((rst is None) ^ (inc is None))
|
||||
fixed = rst is not None
|
||||
assert clk is not None or fixed
|
||||
|
||||
params = dict(
|
||||
p_SIGNAL_PATTERN = "DATA",
|
||||
|
@ -174,7 +218,7 @@ class S7LPDDR4PHY(LPDDR4PHY):
|
|||
if not fixed:
|
||||
params.update(dict(
|
||||
p_ODELAY_TYPE = "VARIABLE",
|
||||
i_C = ClockSignal(),
|
||||
i_C = ClockSignal(clk),
|
||||
i_LD = rst,
|
||||
i_CE = inc,
|
||||
i_LDPIPEEN = 0,
|
||||
|
@ -183,66 +227,69 @@ class S7LPDDR4PHY(LPDDR4PHY):
|
|||
|
||||
self.specials += Instance("ODELAYE2", **params)
|
||||
|
||||
def oserdese2_ddr(self, *, din, dout, clk, tin=None, tout=None):
|
||||
# FIXME: must implement 1 step of serialization manually (16bit -> 8bit)
|
||||
# assert self.nphases == 4
|
||||
nphases = 4
|
||||
assert not ((tin is None) ^ (tout is None))
|
||||
def oserdese2_ddr(self, *, din, clk, dout=None, dout_fb=None, tin=None, tout=None):
|
||||
data_width = len(din)
|
||||
assert data_width == 8, (data_width, din)
|
||||
assert not ((tin is None) ^ (tout is None)), "When using tristate specify both `tin` and `tout`"
|
||||
assert not ((dout is None) and (dout_fb is None)), "Output to OQ (-> IOB) and/or to OFB (-> ISERDESE2/ODELAYE2)"
|
||||
|
||||
dout = Signal() if dout is None else dout
|
||||
dout_fb = Signal() if dout_fb is None else dout_fb
|
||||
|
||||
params = dict(
|
||||
p_SERDES_MODE = "MASTER",
|
||||
p_DATA_WIDTH = 2*nphases,
|
||||
p_DATA_WIDTH = data_width,
|
||||
p_TRISTATE_WIDTH = 1,
|
||||
p_DATA_RATE_OQ = "DDR",
|
||||
p_DATA_RATE_TQ = "BUF",
|
||||
i_RST = ResetSignal(),
|
||||
i_RST = ResetSignal() | self._rst.storage,
|
||||
i_CLK = ClockSignal(clk),
|
||||
i_CLKDIV = ClockSignal("sys"),
|
||||
i_CLKDIV = ClockSignal("sys2x"),
|
||||
o_OQ = dout,
|
||||
o_OFB = dout_fb,
|
||||
i_OCE = 1,
|
||||
)
|
||||
|
||||
for i in range(2*nphases):
|
||||
params["i_D{}".format(i+1)] = din[i]
|
||||
for i in range(data_width):
|
||||
params[f"i_D{i+1}"] = din[i]
|
||||
|
||||
if tin is not None:
|
||||
# with DATA_RATE_TQ=BUF tristate is asynchronous, so we need to delay it
|
||||
tin_d = Signal()
|
||||
self.sync += tin_d.eq(tin)
|
||||
|
||||
# register it on the CLKDIV (as it would be too short for 180 deg shifted clk)
|
||||
tin_cdc = Signal()
|
||||
sd_clkdiv = getattr(self.sync, clk)
|
||||
sd_clkdiv += tin_cdc.eq(tin_d)
|
||||
|
||||
params.update(dict(i_TCE=1, i_T1=tin_cdc, o_TQ=tout))
|
||||
params.update(dict(i_TCE=1, i_T1=self.delayed_sys2x(tin), o_TQ=tout))
|
||||
|
||||
self.specials += Instance("OSERDESE2", **params)
|
||||
|
||||
def oserdese2_sdr(self, **kwargs):
|
||||
# Use 8:1 OSERDESE2 DDR instead of 4:1 OSERDESE2 SDR to have the same latency
|
||||
din = kwargs["din"]
|
||||
din_ddr = Signal(2*len(din))
|
||||
kwargs["din"] = din_ddr
|
||||
self.comb += din_ddr.eq(Cat(*[Replicate(bit, 2) for bit in din]))
|
||||
self.oserdese2_ddr(**kwargs)
|
||||
|
||||
def iserdese2_ddr(self, *, din, dout, clk):
|
||||
# FIXME: must implement 1 step of serialization manually (16bit -> 8bit)
|
||||
# assert self.nphases == 4
|
||||
nphases = 4
|
||||
data_width = len(dout)
|
||||
assert data_width == 8, (data_width, dout)
|
||||
|
||||
params = dict(
|
||||
p_SERDES_MODE = "MASTER",
|
||||
p_INTERFACE_TYPE = "NETWORKING", # TODO: try using MEMORY mode?
|
||||
p_DATA_WIDTH = 2*nphases,
|
||||
p_DATA_WIDTH = data_width,
|
||||
p_DATA_RATE = "DDR",
|
||||
p_NUM_CE = 1,
|
||||
p_IOBDELAY = "IFD",
|
||||
i_RST = ResetSignal(),
|
||||
i_RST = ResetSignal() | self._rst.storage,
|
||||
i_CLK = ClockSignal(clk),
|
||||
i_CLKB = ~ClockSignal(clk),
|
||||
i_CLKDIV = ClockSignal("sys"),
|
||||
i_CLKDIV = ClockSignal("sys2x"),
|
||||
i_BITSLIP = 0,
|
||||
i_CE1 = 1,
|
||||
i_DDLY = din,
|
||||
)
|
||||
|
||||
for i in range(2*nphases):
|
||||
for i in range(data_width):
|
||||
# invert order
|
||||
params["o_Q{}".format(i+1)] = dout[(2*nphases - 1) - i]
|
||||
params[f"o_Q{i+1}"] = dout[(data_width - 1) - i]
|
||||
|
||||
self.specials += Instance("ISERDESE2", **params)
|
||||
|
||||
|
|
Loading…
Reference in New Issue