lpddr4: add support for MASKED-WRITE

This commit is contained in:
Jędrzej Boczar 2021-01-27 11:27:38 +01:00
parent 4b78fc99e8
commit 183f1643aa
7 changed files with 122 additions and 87 deletions

View File

@ -974,10 +974,13 @@ class MT53E256M16D1(SDRAMModule):
nrows = 32768 nrows = 32768
ncols = 1024 ncols = 1024
# TODO: find a way to select if we need masked writes
tccd = {"write": (8, None), "masked-write": (32, None)}
# TODO: tZQCS - performing ZQC during runtime will require modifying Refresher, as ZQC has to be done in 2 phases # TODO: tZQCS - performing ZQC during runtime will require modifying Refresher, as ZQC has to be done in 2 phases
# 1. ZQCAL START is issued 2. ZQCAL LATCH updates the values, the time START->LATCH tZQCAL=1us, so we cannot block # 1. ZQCAL START is issued 2. ZQCAL LATCH updates the values, the time START->LATCH tZQCAL=1us, so we cannot block
# the controller during this time, after ZQCAL LATCH we have to wait tZQLAT=max(8ck, 30ns) # the controller during this time, after ZQCAL LATCH we have to wait tZQLAT=max(8ck, 30ns)
technology_timings = _TechnologyTimings(tREFI=32e6/8192, tWTR=(8, 10), tCCD=(8, None), tRRD=(4, 10), tZQCS=None) technology_timings = _TechnologyTimings(tREFI=32e6/8192, tWTR=(8, 10), tCCD=tccd["masked-write"], tRRD=(4, 10), tZQCS=None)
speedgrade_timings = { speedgrade_timings = {
"1866": _SpeedgradeTimings(tRP=(3, 21), tRCD=(4, 18), tWR=(4, 18), tRFC=180, tFAW=40, tRAS=(3, 42)), # TODO: tRAS_max "1866": _SpeedgradeTimings(tRP=(3, 21), tRCD=(4, 18), tWR=(4, 18), tRFC=180, tFAW=40, tRAS=(3, 42)), # TODO: tRAS_max
} }

View File

@ -15,7 +15,8 @@ from litedram.phy.lpddr4.commands import DFIPhaseAdapter
class LPDDR4PHY(Module, AutoCSR): class LPDDR4PHY(Module, AutoCSR):
def __init__(self, pads, *, def __init__(self, pads, *,
sys_clk_freq, write_ser_latency, read_des_latency, phytype, cmd_delay=None): sys_clk_freq, write_ser_latency, read_des_latency, phytype,
masked_write=True, cmd_delay=None):
self.pads = pads self.pads = pads
self.memtype = memtype = "LPDDR4" self.memtype = memtype = "LPDDR4"
self.nranks = nranks = 1 if not hasattr(pads, "cs_n") else len(pads.cs_n) self.nranks = nranks = 1 if not hasattr(pads, "cs_n") else len(pads.cs_n)
@ -132,7 +133,7 @@ class LPDDR4PHY(Module, AutoCSR):
# # # # # #
adapters = [DFIPhaseAdapter(phase) for phase in self.dfi.phases] adapters = [DFIPhaseAdapter(phase, masked_write=masked_write) for phase in self.dfi.phases]
self.submodules += adapters self.submodules += adapters
# Now prepare the data by converting the sequences on adapters into sequences on the pads. # Now prepare the data by converting the sequences on adapters into sequences on the pads.
@ -258,8 +259,8 @@ class LPDDR4PHY(Module, AutoCSR):
self.submodules += BitSlip( self.submodules += BitSlip(
dw = 2*nphases, dw = 2*nphases,
cycles = bitslip_cycles, cycles = bitslip_cycles,
rst = (self._dly_sel.storage[bit//8] & self._wdly_dq_bitslip_rst.re) | self._rst.storage, rst = (self._dly_sel.storage[bit] & self._wdly_dq_bitslip_rst.re) | self._rst.storage,
slp = self._dly_sel.storage[bit//8] & self._wdly_dq_bitslip.re, slp = self._dly_sel.storage[bit] & self._wdly_dq_bitslip.re,
i = dqs_pattern.o, i = dqs_pattern.o,
o = self.ck_dqs_o[bit], o = self.ck_dqs_o[bit],
) )
@ -268,10 +269,21 @@ class LPDDR4PHY(Module, AutoCSR):
# DMI signal is used for Data Mask or Data Bus Invertion depending on Mode Registers values. # DMI signal is used for Data Mask or Data Bus Invertion depending on Mode Registers values.
# With DM and DBI disabled, this signal is a Don't Care. # With DM and DBI disabled, this signal is a Don't Care.
# With DM enabled, masking is performed only when the command used is WRITE-MASKED. # With DM enabled, masking is performed only when the command used is WRITE-MASKED.
# TODO: use WRITE-MASKED for all write commands, and configure Mode Registers for that # We don't support DBI, DM support is configured statically with `masked_write`.
# during DRAM initialization (we don't want to support DBI). for bit in range(self.databits//8):
for bin in range(self.databits//8): if not masked_write:
self.comb += self.ck_dmi_o[bit].eq(0) self.comb += self.ck_dmi_o[bit].eq(0)
self.comb += self.dmi_oe.eq(0)
else:
self.comb += self.dmi_oe.eq(self.dq_oe)
self.submodules += BitSlip(
dw = 2*nphases,
cycles = bitslip_cycles,
rst = (self._dly_sel.storage[bit] & self._wdly_dq_bitslip_rst.re) | self._rst.storage,
slp = self._dly_sel.storage[bit] & self._wdly_dq_bitslip.re,
i = Cat(*[self.dfi.phases[i//2] .wrdata_mask[i%2 * self.databits//8 + bit] for i in range(2*nphases)]),
o = self.ck_dmi_o[bit],
)
# Read Control Path ------------------------------------------------------------------------ # Read Control Path ------------------------------------------------------------------------
# Creates a delay line of read commands coming from the DFI interface. The output is used to # Creates a delay line of read commands coming from the DFI interface. The output is used to

View File

@ -30,7 +30,7 @@ class DFIPhaseAdapter(Module):
are then counted starting from CS low on the 4th cycle. are then counted starting from CS low on the 4th cycle.
""" """
def __init__(self, dfi_phase): def __init__(self, dfi_phase, masked_write=True):
# CS/CA values for 4 SDR cycles # CS/CA values for 4 SDR cycles
self.cs = Signal(4) self.cs = Signal(4)
self.ca = Array([Signal(6) for _ in range(4)]) self.ca = Array([Signal(6) for _ in range(4)])
@ -65,11 +65,12 @@ class DFIPhaseAdapter(Module):
def cmds(cmd1, cmd2, valid=1): def cmds(cmd1, cmd2, valid=1):
return self.cmd1.set(cmd1) + self.cmd2.set(cmd2) + [self.valid.eq(valid)] return self.cmd1.set(cmd1) + self.cmd2.set(cmd2) + [self.valid.eq(valid)]
write1 = "MASK WRITE-1" if masked_write else "WRITE-1"
self.comb += If(dfi_phase.cs_n == 0, # require dfi.cs_n self.comb += If(dfi_phase.cs_n == 0, # require dfi.cs_n
Case(dfi_cmd, { Case(dfi_cmd, {
_cmd["ACT"]: cmds("ACTIVATE-1", "ACTIVATE-2"), _cmd["ACT"]: cmds("ACTIVATE-1", "ACTIVATE-2"),
_cmd["RD"]: cmds("READ-1", "CAS-2"), _cmd["RD"]: cmds("READ-1", "CAS-2"),
_cmd["WR"]: cmds("WRITE-1", "CAS-2"), # TODO: masked write _cmd["WR"]: cmds(write1, "CAS-2"),
_cmd["PRE"]: cmds("DESELECT", "PRECHARGE"), _cmd["PRE"]: cmds("DESELECT", "PRECHARGE"),
_cmd["REF"]: cmds("DESELECT", "REFRESH"), _cmd["REF"]: cmds("DESELECT", "REFRESH"),
_cmd["ZQC"]: cmds("DESELECT", "MPC"), _cmd["ZQC"]: cmds("DESELECT", "MPC"),

View File

@ -518,8 +518,8 @@ class DQWrite(DQBurst):
NextValue(masked, self.masked), NextValue(masked, self.masked),
], ],
ops = [ ops = [
self.log.debug("WRITE[%d]: bank=%d, row=%d, col=%d, data=0x%04x", self.log.debug("WRITE[%d]: bank=%d, row=%d, col=%d, dq=0x%04x dm=0x%02b",
self.burst_counter, bank, row, self.col_burst, dq, once=False), self.burst_counter, bank, row, self.col_burst, dq, dmi, once=False),
If(masked, If(masked,
ports[bank].we.eq(~dmi), # DMI high masks the beat ports[bank].we.eq(~dmi), # DMI high masks the beat
).Else( ).Else(
@ -534,7 +534,7 @@ class DQRead(DQBurst):
def __init__(self, *, dq, ports, nrows, ncols, bank, row, col, **kwargs): def __init__(self, *, dq, ports, nrows, ncols, bank, row, col, **kwargs):
super().__init__(nrows=nrows, ncols=ncols, row=row, col=col, **kwargs) super().__init__(nrows=nrows, ncols=ncols, row=row, col=col, **kwargs)
self.add_fsm([ self.add_fsm([
self.log.debug("READ[%d]: bank=%d, row=%d, col=%d, data=0x%04x", self.log.debug("READ[%d]: bank=%d, row=%d, col=%d, dq=0x%04x",
self.burst_counter, bank, row, self.col_burst, dq, once=False), self.burst_counter, bank, row, self.col_burst, dq, once=False),
ports[bank].we.eq(0), ports[bank].we.eq(0),
ports[bank].adr.eq(self.addr), ports[bank].adr.eq(self.addr),

View File

@ -36,14 +36,15 @@ class LPDDR4SimulationPads(Module):
class LPDDR4SimPHY(LPDDR4PHY): class LPDDR4SimPHY(LPDDR4PHY):
def __init__(self, sys_clk_freq=100e6, aligned_reset_zero=False): def __init__(self, sys_clk_freq=100e6, aligned_reset_zero=False, **kwargs):
pads = LPDDR4SimulationPads() pads = LPDDR4SimulationPads()
self.submodules += pads self.submodules += pads
super().__init__(pads, super().__init__(pads,
sys_clk_freq = sys_clk_freq, sys_clk_freq = sys_clk_freq,
write_ser_latency = Serializer.LATENCY, write_ser_latency = Serializer.LATENCY,
read_des_latency = Deserializer.LATENCY, read_des_latency = Deserializer.LATENCY,
phytype = "LPDDR4SimPHY") phytype = "LPDDR4SimPHY",
**kwargs)
def add_reset_value(phase, kwargs): def add_reset_value(phase, kwargs):
if aligned_reset_zero and phase == 0: if aligned_reset_zero and phase == 0:

View File

@ -64,7 +64,6 @@ class Clocks(dict): # FORMAT: {name: {"freq_hz": _, "phase_deg": _}, ...}
def add_io(self, io): def add_io(self, io):
for name in self.names(): for name in self.names():
print((name + "_clk", 0, Pins(1)))
io.append((name + "_clk", 0, Pins(1))) io.append((name + "_clk", 0, Pins(1)))
def add_clockers(self, sim_config): def add_clockers(self, sim_config):
@ -105,7 +104,7 @@ def get_clocks(sys_clk_freq):
class SimSoC(SoCCore): class SimSoC(SoCCore):
def __init__(self, clocks, log_level, auto_precharge=False, with_refresh=True, trace_reset=0, def __init__(self, clocks, log_level, auto_precharge=False, with_refresh=True, trace_reset=0,
disable_delay=False, **kwargs): disable_delay=False, masked_write=True, **kwargs):
platform = Platform() platform = Platform()
sys_clk_freq = clocks["sys"]["freq_hz"] sys_clk_freq = clocks["sys"]["freq_hz"]
@ -126,7 +125,11 @@ class SimSoC(SoCCore):
# LPDDR4 ----------------------------------------------------------------------------------- # LPDDR4 -----------------------------------------------------------------------------------
sdram_module = litedram_modules.MT53E256M16D1(sys_clk_freq, "1:8") sdram_module = litedram_modules.MT53E256M16D1(sys_clk_freq, "1:8")
pads = platform.request("lpddr4") pads = platform.request("lpddr4")
self.submodules.ddrphy = LPDDR4SimPHY(sys_clk_freq=sys_clk_freq, aligned_reset_zero=True) self.submodules.ddrphy = LPDDR4SimPHY(
sys_clk_freq = sys_clk_freq,
aligned_reset_zero = True,
masked_write = masked_write,
)
# fake delays (make no nsense in simulation, but sdram.c expects them) # fake delays (make no nsense in simulation, but sdram.c expects them)
self.ddrphy._rdly_dq_rst = CSR() self.ddrphy._rdly_dq_rst = CSR()
self.ddrphy._rdly_dq_inc = CSR() self.ddrphy._rdly_dq_inc = CSR()
@ -443,6 +446,11 @@ def generate_gtkw_savefile(builder, vns, trace_fst):
filter = regex_filter(suffixes2re(["wrdata"])), filter = regex_filter(suffixes2re(["wrdata"])),
sorter = dfi_sorter(), sorter = dfi_sorter(),
colorer = dfi_per_phase_colorer()) colorer = dfi_per_phase_colorer())
gtkw.add(soc.ddrphy.dfi,
group_name = "dfi wrdata_mask",
filter = regex_filter(suffixes2re(["wrdata_mask"])),
sorter = dfi_sorter(),
colorer = dfi_per_phase_colorer())
gtkw.add(soc.ddrphy.dfi, gtkw.add(soc.ddrphy.dfi,
group_name = "dfi rddata", group_name = "dfi rddata",
filter = regex_filter(suffixes2re(["rddata"])), filter = regex_filter(suffixes2re(["rddata"])),
@ -475,6 +483,8 @@ def main():
parser.add_argument("--log-level", default="all=INFO", help="Set simulation logging level") parser.add_argument("--log-level", default="all=INFO", help="Set simulation logging level")
parser.add_argument("--disable-delay", action="store_true", help="Disable CPU delays") parser.add_argument("--disable-delay", action="store_true", help="Disable CPU delays")
parser.add_argument("--gtkw-savefile", action="store_true", help="Generate GTKWave savefile") parser.add_argument("--gtkw-savefile", action="store_true", help="Generate GTKWave savefile")
parser.add_argument("--no-masked-write", action="store_true", help="Use LPDDR4 WRITE instead of MASKED-WRITE")
parser.add_argument("--no-run", action="store_true", help="Don't run the simulation, just generate files")
args = parser.parse_args() args = parser.parse_args()
soc_kwargs = soc_sdram_argdict(args) soc_kwargs = soc_sdram_argdict(args)
@ -503,6 +513,7 @@ def main():
trace_reset = int(args.trace_reset), trace_reset = int(args.trace_reset),
log_level = args.log_level, log_level = args.log_level,
disable_delay = args.disable_delay, disable_delay = args.disable_delay,
masked_write = not args.no_masked_write,
**soc_kwargs) **soc_kwargs)
# Build/Run ------------------------------------------------------------------------------------ # Build/Run ------------------------------------------------------------------------------------
@ -520,7 +531,8 @@ def main():
if args.gtkw_savefile: if args.gtkw_savefile:
generate_gtkw_savefile(builder, vns, trace_fst=args.trace_fst) generate_gtkw_savefile(builder, vns, trace_fst=args.trace_fst)
builder.build(build=False, **build_kwargs) if not args.no_run:
builder.build(build=False, **build_kwargs)
if __name__ == "__main__": if __name__ == "__main__":
main() main()

View File

@ -508,25 +508,28 @@ class TestLPDDR4(unittest.TestCase):
mrw = dict(cs_n=0, cas_n=0, ras_n=0, we_n=0, bank=0, address=(0b110011 << 8) | 0b10101010) # 6-bit address | 8-bit op code mrw = dict(cs_n=0, cas_n=0, ras_n=0, we_n=0, bank=0, address=(0b110011 << 8) | 0b10101010) # 6-bit address | 8-bit op code
zqc_start = dict(cs_n=0, cas_n=1, ras_n=1, we_n=0, bank=0, address=0b1001111) # MPC with ZQCAL START operand zqc_start = dict(cs_n=0, cas_n=1, ras_n=1, we_n=0, bank=0, address=0b1001111) # MPC with ZQCAL START operand
zqc_latch = dict(cs_n=0, cas_n=1, ras_n=1, we_n=0, bank=0, address=0b1010001) # MPC with ZQCAL LATCH operand zqc_latch = dict(cs_n=0, cas_n=1, ras_n=1, we_n=0, bank=0, address=0b1010001) # MPC with ZQCAL LATCH operand
self.run_test(LPDDR4SimPHY(), for masked_write in [True, False]:
dfi_sequence = [ with self.subTest(masked_write=masked_write):
{0: read, 4: write_ap}, wr_ca3 = '{}x00'.format('0' if not masked_write else '1')
{0: activate, 4: refresh_ab}, self.run_test(LPDDR4SimPHY(masked_write=masked_write),
{0: precharge, 4: mrw}, dfi_sequence = [
{0: zqc_start, 4: zqc_latch}, {0: read, 4: write_ap},
], {0: activate, 4: refresh_ab},
pad_checkers = {"sys8x_90": { {0: precharge, 4: mrw},
# note that refresh and precharge have a single command so these go as cmd2 {0: zqc_start, 4: zqc_latch},
# rd wr act ref pre mrw zqcs zqcl ],
'cs': latency + '1010'+'1010' + '1010'+'0010' + '0010'+'1010' + '0010'+'0010', pad_checkers = {"sys8x_90": {
'ca0': latency + '0100'+'0100' + '1011'+'0000' + '0001'+'0100' + '0001'+'0001', # note that refresh and precharge have a single command so these go as cmd2
'ca1': latency + '1010'+'0110' + '0110'+'0000' + '0001'+'1111' + '0001'+'0000', # rd wr act ref pre mrw zqcs zqcl
'ca2': latency + '0101'+'1100' + '0010'+'0001' + '0000'+'1010' + '0001'+'0000', 'cs': latency + '1010'+'1010' + '1010'+'0010' + '0010'+'1010' + '0010'+'0010',
'ca3': latency + '0x01'+'0x00' + '1110'+'001x' + '000x'+'0001' + '0001'+'0000', 'ca0': latency + '0100'+'0100' + '1011'+'0000' + '0001'+'0100' + '0001'+'0001',
'ca4': latency + '0110'+'0010' + '1010'+'000x' + '001x'+'0110' + '0000'+'0001', 'ca1': latency + '1010'+'0110' + '0110'+'0000' + '0001'+'1111' + '0001'+'0000',
'ca5': latency + '0010'+'0100' + '1001'+'001x' + '000x'+'1101' + '0010'+'0010', 'ca2': latency + '0101'+'1100' + '0010'+'0001' + '0000'+'1010' + '0001'+'0000',
}}, 'ca3': latency + '0x01'+wr_ca3 + '1110'+'001x' + '000x'+'0001' + '0001'+'0000',
) 'ca4': latency + '0110'+'0010' + '1010'+'000x' + '001x'+'0110' + '0000'+'0001',
'ca5': latency + '0010'+'0100' + '1001'+'001x' + '000x'+'1101' + '0010'+'0010',
}},
)
def test_lpddr4_command_pads(self): def test_lpddr4_command_pads(self):
# Test serialization of DFI command pins (cs/cke/odt/reset_n) # Test serialization of DFI command pins (cs/cke/odt/reset_n)
@ -720,53 +723,56 @@ class TestLPDDR4(unittest.TestCase):
def test_lpddr4_cmd_write(self): def test_lpddr4_cmd_write(self):
# Test whole WRITE command sequence verifying data on pads and write_latency from MC perspective # Test whole WRITE command sequence verifying data on pads and write_latency from MC perspective
phy = LPDDR4SimPHY() for masked_write in [True, False]:
zero = '00000000' * 2 with self.subTest(masked_write=masked_write):
write_latency = phy.settings.write_latency phy = LPDDR4SimPHY(masked_write=masked_write)
wrphase = phy.settings.wrphase.reset.value zero = '00000000' * 2
write_latency = phy.settings.write_latency
wrphase = phy.settings.wrphase.reset.value
dfi_data = { dfi_data = {
0: dict(wrdata=0x11112222), 0: dict(wrdata=0x11112222),
1: dict(wrdata=0x33334444), 1: dict(wrdata=0x33334444),
2: dict(wrdata=0x55556666), 2: dict(wrdata=0x55556666),
3: dict(wrdata=0x77778888), 3: dict(wrdata=0x77778888),
4: dict(wrdata=0x9999aaaa), 4: dict(wrdata=0x9999aaaa),
5: dict(wrdata=0xbbbbcccc), 5: dict(wrdata=0xbbbbcccc),
6: dict(wrdata=0xddddeeee), 6: dict(wrdata=0xddddeeee),
7: dict(wrdata=0xffff0000), 7: dict(wrdata=0xffff0000),
} }
dfi_sequence = [ dfi_sequence = [
{wrphase: dict(cs_n=0, cas_n=0, ras_n=1, we_n=0, wrdata_en=1)}, {wrphase: dict(cs_n=0, cas_n=0, ras_n=1, we_n=0, wrdata_en=1)},
*[{} for _ in range(write_latency - 1)], *[{} for _ in range(write_latency - 1)],
dfi_data, dfi_data,
{}, {},
{}, {},
{}, {},
{}, {},
{}, {},
] ]
self.run_test(phy, wr_ca3 = "0000{}000".format('0' if not masked_write else '1')
dfi_sequence = dfi_sequence, self.run_test(phy,
pad_checkers = { dfi_sequence = dfi_sequence,
"sys8x_90": { pad_checkers = {
"cs": "00000000"*2 + "00001010" + "00000000"*2, "sys8x_90": {
"ca0": "00000000"*2 + "00000000" + "00000000"*2, "cs": "00000000"*2 + "00001010" + "00000000"*2,
"ca1": "00000000"*2 + "00000010" + "00000000"*2, "ca0": "00000000"*2 + "00000000" + "00000000"*2,
"ca2": "00000000"*2 + "00001000" + "00000000"*2, "ca1": "00000000"*2 + "00000010" + "00000000"*2,
"ca3": "00000000"*2 + "00000000" + "00000000"*2, "ca2": "00000000"*2 + "00001000" + "00000000"*2,
"ca4": "00000000"*2 + "00000010" + "00000000"*2, "ca3": "00000000"*2 + wr_ca3 + "00000000"*2,
"ca5": "00000000"*2 + "00000000" + "00000000"*2, "ca4": "00000000"*2 + "00000010" + "00000000"*2,
}, "ca5": "00000000"*2 + "00000000" + "00000000"*2,
"sys8x_90_ddr": { },
f'dq{i}': (self.CMD_LATENCY+1)*zero + zero + dq_pattern(i, dfi_data, "wrdata") + zero "sys8x_90_ddr": {
for i in range(16) f'dq{i}': (self.CMD_LATENCY+1)*zero + zero + dq_pattern(i, dfi_data, "wrdata") + zero
}, for i in range(16)
"sys8x_ddr": { },
"dqs0": (self.CMD_LATENCY+1)*zero + '01010101'+'01010100' + '01010101'+'01010101' + '00010101'+'01010101' + zero, "sys8x_ddr": {
}, "dqs0": (self.CMD_LATENCY+1)*zero + '01010101'+'01010100' + '01010101'+'01010101' + '00010101'+'01010101' + zero,
}, },
) },
)
def test_lpddr4_cmd_read(self): def test_lpddr4_cmd_read(self):
# Test whole READ command sequence simulating DRAM response and verifying read_latency from MC perspective # Test whole READ command sequence simulating DRAM response and verifying read_latency from MC perspective