From 183f1643aac354d9b683d56f76aa0ec3880e4c3f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C4=99drzej=20Boczar?= Date: Wed, 27 Jan 2021 11:27:38 +0100 Subject: [PATCH] lpddr4: add support for MASKED-WRITE --- litedram/modules.py | 5 +- litedram/phy/lpddr4/basephy.py | 28 +++++-- litedram/phy/lpddr4/commands.py | 5 +- litedram/phy/lpddr4/sim.py | 6 +- litedram/phy/lpddr4/simphy.py | 11 +-- litedram/phy/lpddr4/simsoc.py | 20 ++++- test/test_lpddr4.py | 134 +++++++++++++++++--------------- 7 files changed, 122 insertions(+), 87 deletions(-) diff --git a/litedram/modules.py b/litedram/modules.py index ac9770a..d323106 100644 --- a/litedram/modules.py +++ b/litedram/modules.py @@ -974,10 +974,13 @@ class MT53E256M16D1(SDRAMModule): nrows = 32768 ncols = 1024 + # TODO: find a way to select if we need masked writes + tccd = {"write": (8, None), "masked-write": (32, None)} + # TODO: tZQCS - performing ZQC during runtime will require modifying Refresher, as ZQC has to be done in 2 phases # 1. ZQCAL START is issued 2. ZQCAL LATCH updates the values, the time START->LATCH tZQCAL=1us, so we cannot block # the controller during this time, after ZQCAL LATCH we have to wait tZQLAT=max(8ck, 30ns) - technology_timings = _TechnologyTimings(tREFI=32e6/8192, tWTR=(8, 10), tCCD=(8, None), tRRD=(4, 10), tZQCS=None) + technology_timings = _TechnologyTimings(tREFI=32e6/8192, tWTR=(8, 10), tCCD=tccd["masked-write"], tRRD=(4, 10), tZQCS=None) speedgrade_timings = { "1866": _SpeedgradeTimings(tRP=(3, 21), tRCD=(4, 18), tWR=(4, 18), tRFC=180, tFAW=40, tRAS=(3, 42)), # TODO: tRAS_max } diff --git a/litedram/phy/lpddr4/basephy.py b/litedram/phy/lpddr4/basephy.py index 36e4ccc..76e7f9e 100644 --- a/litedram/phy/lpddr4/basephy.py +++ b/litedram/phy/lpddr4/basephy.py @@ -15,7 +15,8 @@ from litedram.phy.lpddr4.commands import DFIPhaseAdapter class LPDDR4PHY(Module, AutoCSR): def __init__(self, pads, *, - sys_clk_freq, write_ser_latency, read_des_latency, phytype, cmd_delay=None): + sys_clk_freq, write_ser_latency, read_des_latency, phytype, + masked_write=True, cmd_delay=None): self.pads = pads self.memtype = memtype = "LPDDR4" self.nranks = nranks = 1 if not hasattr(pads, "cs_n") else len(pads.cs_n) @@ -132,7 +133,7 @@ class LPDDR4PHY(Module, AutoCSR): # # # - adapters = [DFIPhaseAdapter(phase) for phase in self.dfi.phases] + adapters = [DFIPhaseAdapter(phase, masked_write=masked_write) for phase in self.dfi.phases] self.submodules += adapters # Now prepare the data by converting the sequences on adapters into sequences on the pads. @@ -258,8 +259,8 @@ class LPDDR4PHY(Module, AutoCSR): self.submodules += BitSlip( dw = 2*nphases, cycles = bitslip_cycles, - rst = (self._dly_sel.storage[bit//8] & self._wdly_dq_bitslip_rst.re) | self._rst.storage, - slp = self._dly_sel.storage[bit//8] & self._wdly_dq_bitslip.re, + rst = (self._dly_sel.storage[bit] & self._wdly_dq_bitslip_rst.re) | self._rst.storage, + slp = self._dly_sel.storage[bit] & self._wdly_dq_bitslip.re, i = dqs_pattern.o, o = self.ck_dqs_o[bit], ) @@ -268,10 +269,21 @@ class LPDDR4PHY(Module, AutoCSR): # DMI signal is used for Data Mask or Data Bus Invertion depending on Mode Registers values. # With DM and DBI disabled, this signal is a Don't Care. # With DM enabled, masking is performed only when the command used is WRITE-MASKED. - # TODO: use WRITE-MASKED for all write commands, and configure Mode Registers for that - # during DRAM initialization (we don't want to support DBI). - for bin in range(self.databits//8): - self.comb += self.ck_dmi_o[bit].eq(0) + # We don't support DBI, DM support is configured statically with `masked_write`. + for bit in range(self.databits//8): + if not masked_write: + self.comb += self.ck_dmi_o[bit].eq(0) + self.comb += self.dmi_oe.eq(0) + else: + self.comb += self.dmi_oe.eq(self.dq_oe) + self.submodules += BitSlip( + dw = 2*nphases, + cycles = bitslip_cycles, + rst = (self._dly_sel.storage[bit] & self._wdly_dq_bitslip_rst.re) | self._rst.storage, + slp = self._dly_sel.storage[bit] & self._wdly_dq_bitslip.re, + i = Cat(*[self.dfi.phases[i//2] .wrdata_mask[i%2 * self.databits//8 + bit] for i in range(2*nphases)]), + o = self.ck_dmi_o[bit], + ) # Read Control Path ------------------------------------------------------------------------ # Creates a delay line of read commands coming from the DFI interface. The output is used to diff --git a/litedram/phy/lpddr4/commands.py b/litedram/phy/lpddr4/commands.py index aa04db0..8b4ac78 100644 --- a/litedram/phy/lpddr4/commands.py +++ b/litedram/phy/lpddr4/commands.py @@ -30,7 +30,7 @@ class DFIPhaseAdapter(Module): are then counted starting from CS low on the 4th cycle. """ - def __init__(self, dfi_phase): + def __init__(self, dfi_phase, masked_write=True): # CS/CA values for 4 SDR cycles self.cs = Signal(4) self.ca = Array([Signal(6) for _ in range(4)]) @@ -65,11 +65,12 @@ class DFIPhaseAdapter(Module): def cmds(cmd1, cmd2, valid=1): return self.cmd1.set(cmd1) + self.cmd2.set(cmd2) + [self.valid.eq(valid)] + write1 = "MASK WRITE-1" if masked_write else "WRITE-1" self.comb += If(dfi_phase.cs_n == 0, # require dfi.cs_n Case(dfi_cmd, { _cmd["ACT"]: cmds("ACTIVATE-1", "ACTIVATE-2"), _cmd["RD"]: cmds("READ-1", "CAS-2"), - _cmd["WR"]: cmds("WRITE-1", "CAS-2"), # TODO: masked write + _cmd["WR"]: cmds(write1, "CAS-2"), _cmd["PRE"]: cmds("DESELECT", "PRECHARGE"), _cmd["REF"]: cmds("DESELECT", "REFRESH"), _cmd["ZQC"]: cmds("DESELECT", "MPC"), diff --git a/litedram/phy/lpddr4/sim.py b/litedram/phy/lpddr4/sim.py index 0ee9883..3217a7a 100644 --- a/litedram/phy/lpddr4/sim.py +++ b/litedram/phy/lpddr4/sim.py @@ -518,8 +518,8 @@ class DQWrite(DQBurst): NextValue(masked, self.masked), ], ops = [ - self.log.debug("WRITE[%d]: bank=%d, row=%d, col=%d, data=0x%04x", - self.burst_counter, bank, row, self.col_burst, dq, once=False), + self.log.debug("WRITE[%d]: bank=%d, row=%d, col=%d, dq=0x%04x dm=0x%02b", + self.burst_counter, bank, row, self.col_burst, dq, dmi, once=False), If(masked, ports[bank].we.eq(~dmi), # DMI high masks the beat ).Else( @@ -534,7 +534,7 @@ class DQRead(DQBurst): def __init__(self, *, dq, ports, nrows, ncols, bank, row, col, **kwargs): super().__init__(nrows=nrows, ncols=ncols, row=row, col=col, **kwargs) self.add_fsm([ - self.log.debug("READ[%d]: bank=%d, row=%d, col=%d, data=0x%04x", + self.log.debug("READ[%d]: bank=%d, row=%d, col=%d, dq=0x%04x", self.burst_counter, bank, row, self.col_burst, dq, once=False), ports[bank].we.eq(0), ports[bank].adr.eq(self.addr), diff --git a/litedram/phy/lpddr4/simphy.py b/litedram/phy/lpddr4/simphy.py index 8258589..8248b3b 100644 --- a/litedram/phy/lpddr4/simphy.py +++ b/litedram/phy/lpddr4/simphy.py @@ -36,14 +36,15 @@ class LPDDR4SimulationPads(Module): class LPDDR4SimPHY(LPDDR4PHY): - def __init__(self, sys_clk_freq=100e6, aligned_reset_zero=False): + def __init__(self, sys_clk_freq=100e6, aligned_reset_zero=False, **kwargs): pads = LPDDR4SimulationPads() self.submodules += pads super().__init__(pads, - sys_clk_freq = sys_clk_freq, - write_ser_latency = Serializer.LATENCY, - read_des_latency = Deserializer.LATENCY, - phytype = "LPDDR4SimPHY") + sys_clk_freq = sys_clk_freq, + write_ser_latency = Serializer.LATENCY, + read_des_latency = Deserializer.LATENCY, + phytype = "LPDDR4SimPHY", + **kwargs) def add_reset_value(phase, kwargs): if aligned_reset_zero and phase == 0: diff --git a/litedram/phy/lpddr4/simsoc.py b/litedram/phy/lpddr4/simsoc.py index e503ff3..d74529b 100644 --- a/litedram/phy/lpddr4/simsoc.py +++ b/litedram/phy/lpddr4/simsoc.py @@ -64,7 +64,6 @@ class Clocks(dict): # FORMAT: {name: {"freq_hz": _, "phase_deg": _}, ...} def add_io(self, io): for name in self.names(): - print((name + "_clk", 0, Pins(1))) io.append((name + "_clk", 0, Pins(1))) def add_clockers(self, sim_config): @@ -105,7 +104,7 @@ def get_clocks(sys_clk_freq): class SimSoC(SoCCore): def __init__(self, clocks, log_level, auto_precharge=False, with_refresh=True, trace_reset=0, - disable_delay=False, **kwargs): + disable_delay=False, masked_write=True, **kwargs): platform = Platform() sys_clk_freq = clocks["sys"]["freq_hz"] @@ -126,7 +125,11 @@ class SimSoC(SoCCore): # LPDDR4 ----------------------------------------------------------------------------------- sdram_module = litedram_modules.MT53E256M16D1(sys_clk_freq, "1:8") pads = platform.request("lpddr4") - self.submodules.ddrphy = LPDDR4SimPHY(sys_clk_freq=sys_clk_freq, aligned_reset_zero=True) + self.submodules.ddrphy = LPDDR4SimPHY( + sys_clk_freq = sys_clk_freq, + aligned_reset_zero = True, + masked_write = masked_write, + ) # fake delays (make no nsense in simulation, but sdram.c expects them) self.ddrphy._rdly_dq_rst = CSR() self.ddrphy._rdly_dq_inc = CSR() @@ -443,6 +446,11 @@ def generate_gtkw_savefile(builder, vns, trace_fst): filter = regex_filter(suffixes2re(["wrdata"])), sorter = dfi_sorter(), colorer = dfi_per_phase_colorer()) + gtkw.add(soc.ddrphy.dfi, + group_name = "dfi wrdata_mask", + filter = regex_filter(suffixes2re(["wrdata_mask"])), + sorter = dfi_sorter(), + colorer = dfi_per_phase_colorer()) gtkw.add(soc.ddrphy.dfi, group_name = "dfi rddata", filter = regex_filter(suffixes2re(["rddata"])), @@ -475,6 +483,8 @@ def main(): parser.add_argument("--log-level", default="all=INFO", help="Set simulation logging level") parser.add_argument("--disable-delay", action="store_true", help="Disable CPU delays") parser.add_argument("--gtkw-savefile", action="store_true", help="Generate GTKWave savefile") + parser.add_argument("--no-masked-write", action="store_true", help="Use LPDDR4 WRITE instead of MASKED-WRITE") + parser.add_argument("--no-run", action="store_true", help="Don't run the simulation, just generate files") args = parser.parse_args() soc_kwargs = soc_sdram_argdict(args) @@ -503,6 +513,7 @@ def main(): trace_reset = int(args.trace_reset), log_level = args.log_level, disable_delay = args.disable_delay, + masked_write = not args.no_masked_write, **soc_kwargs) # Build/Run ------------------------------------------------------------------------------------ @@ -520,7 +531,8 @@ def main(): if args.gtkw_savefile: generate_gtkw_savefile(builder, vns, trace_fst=args.trace_fst) - builder.build(build=False, **build_kwargs) + if not args.no_run: + builder.build(build=False, **build_kwargs) if __name__ == "__main__": main() diff --git a/test/test_lpddr4.py b/test/test_lpddr4.py index a8aa348..1a79372 100644 --- a/test/test_lpddr4.py +++ b/test/test_lpddr4.py @@ -508,25 +508,28 @@ class TestLPDDR4(unittest.TestCase): mrw = dict(cs_n=0, cas_n=0, ras_n=0, we_n=0, bank=0, address=(0b110011 << 8) | 0b10101010) # 6-bit address | 8-bit op code zqc_start = dict(cs_n=0, cas_n=1, ras_n=1, we_n=0, bank=0, address=0b1001111) # MPC with ZQCAL START operand zqc_latch = dict(cs_n=0, cas_n=1, ras_n=1, we_n=0, bank=0, address=0b1010001) # MPC with ZQCAL LATCH operand - self.run_test(LPDDR4SimPHY(), - dfi_sequence = [ - {0: read, 4: write_ap}, - {0: activate, 4: refresh_ab}, - {0: precharge, 4: mrw}, - {0: zqc_start, 4: zqc_latch}, - ], - pad_checkers = {"sys8x_90": { - # note that refresh and precharge have a single command so these go as cmd2 - # rd wr act ref pre mrw zqcs zqcl - 'cs': latency + '1010'+'1010' + '1010'+'0010' + '0010'+'1010' + '0010'+'0010', - 'ca0': latency + '0100'+'0100' + '1011'+'0000' + '0001'+'0100' + '0001'+'0001', - 'ca1': latency + '1010'+'0110' + '0110'+'0000' + '0001'+'1111' + '0001'+'0000', - 'ca2': latency + '0101'+'1100' + '0010'+'0001' + '0000'+'1010' + '0001'+'0000', - 'ca3': latency + '0x01'+'0x00' + '1110'+'001x' + '000x'+'0001' + '0001'+'0000', - 'ca4': latency + '0110'+'0010' + '1010'+'000x' + '001x'+'0110' + '0000'+'0001', - 'ca5': latency + '0010'+'0100' + '1001'+'001x' + '000x'+'1101' + '0010'+'0010', - }}, - ) + for masked_write in [True, False]: + with self.subTest(masked_write=masked_write): + wr_ca3 = '{}x00'.format('0' if not masked_write else '1') + self.run_test(LPDDR4SimPHY(masked_write=masked_write), + dfi_sequence = [ + {0: read, 4: write_ap}, + {0: activate, 4: refresh_ab}, + {0: precharge, 4: mrw}, + {0: zqc_start, 4: zqc_latch}, + ], + pad_checkers = {"sys8x_90": { + # note that refresh and precharge have a single command so these go as cmd2 + # rd wr act ref pre mrw zqcs zqcl + 'cs': latency + '1010'+'1010' + '1010'+'0010' + '0010'+'1010' + '0010'+'0010', + 'ca0': latency + '0100'+'0100' + '1011'+'0000' + '0001'+'0100' + '0001'+'0001', + 'ca1': latency + '1010'+'0110' + '0110'+'0000' + '0001'+'1111' + '0001'+'0000', + 'ca2': latency + '0101'+'1100' + '0010'+'0001' + '0000'+'1010' + '0001'+'0000', + 'ca3': latency + '0x01'+wr_ca3 + '1110'+'001x' + '000x'+'0001' + '0001'+'0000', + 'ca4': latency + '0110'+'0010' + '1010'+'000x' + '001x'+'0110' + '0000'+'0001', + 'ca5': latency + '0010'+'0100' + '1001'+'001x' + '000x'+'1101' + '0010'+'0010', + }}, + ) def test_lpddr4_command_pads(self): # Test serialization of DFI command pins (cs/cke/odt/reset_n) @@ -720,53 +723,56 @@ class TestLPDDR4(unittest.TestCase): def test_lpddr4_cmd_write(self): # Test whole WRITE command sequence verifying data on pads and write_latency from MC perspective - phy = LPDDR4SimPHY() - zero = '00000000' * 2 - write_latency = phy.settings.write_latency - wrphase = phy.settings.wrphase.reset.value + for masked_write in [True, False]: + with self.subTest(masked_write=masked_write): + phy = LPDDR4SimPHY(masked_write=masked_write) + zero = '00000000' * 2 + write_latency = phy.settings.write_latency + wrphase = phy.settings.wrphase.reset.value - dfi_data = { - 0: dict(wrdata=0x11112222), - 1: dict(wrdata=0x33334444), - 2: dict(wrdata=0x55556666), - 3: dict(wrdata=0x77778888), - 4: dict(wrdata=0x9999aaaa), - 5: dict(wrdata=0xbbbbcccc), - 6: dict(wrdata=0xddddeeee), - 7: dict(wrdata=0xffff0000), - } - dfi_sequence = [ - {wrphase: dict(cs_n=0, cas_n=0, ras_n=1, we_n=0, wrdata_en=1)}, - *[{} for _ in range(write_latency - 1)], - dfi_data, - {}, - {}, - {}, - {}, - {}, - ] + dfi_data = { + 0: dict(wrdata=0x11112222), + 1: dict(wrdata=0x33334444), + 2: dict(wrdata=0x55556666), + 3: dict(wrdata=0x77778888), + 4: dict(wrdata=0x9999aaaa), + 5: dict(wrdata=0xbbbbcccc), + 6: dict(wrdata=0xddddeeee), + 7: dict(wrdata=0xffff0000), + } + dfi_sequence = [ + {wrphase: dict(cs_n=0, cas_n=0, ras_n=1, we_n=0, wrdata_en=1)}, + *[{} for _ in range(write_latency - 1)], + dfi_data, + {}, + {}, + {}, + {}, + {}, + ] - self.run_test(phy, - dfi_sequence = dfi_sequence, - pad_checkers = { - "sys8x_90": { - "cs": "00000000"*2 + "00001010" + "00000000"*2, - "ca0": "00000000"*2 + "00000000" + "00000000"*2, - "ca1": "00000000"*2 + "00000010" + "00000000"*2, - "ca2": "00000000"*2 + "00001000" + "00000000"*2, - "ca3": "00000000"*2 + "00000000" + "00000000"*2, - "ca4": "00000000"*2 + "00000010" + "00000000"*2, - "ca5": "00000000"*2 + "00000000" + "00000000"*2, - }, - "sys8x_90_ddr": { - f'dq{i}': (self.CMD_LATENCY+1)*zero + zero + dq_pattern(i, dfi_data, "wrdata") + zero - for i in range(16) - }, - "sys8x_ddr": { - "dqs0": (self.CMD_LATENCY+1)*zero + '01010101'+'01010100' + '01010101'+'01010101' + '00010101'+'01010101' + zero, - }, - }, - ) + wr_ca3 = "0000{}000".format('0' if not masked_write else '1') + self.run_test(phy, + dfi_sequence = dfi_sequence, + pad_checkers = { + "sys8x_90": { + "cs": "00000000"*2 + "00001010" + "00000000"*2, + "ca0": "00000000"*2 + "00000000" + "00000000"*2, + "ca1": "00000000"*2 + "00000010" + "00000000"*2, + "ca2": "00000000"*2 + "00001000" + "00000000"*2, + "ca3": "00000000"*2 + wr_ca3 + "00000000"*2, + "ca4": "00000000"*2 + "00000010" + "00000000"*2, + "ca5": "00000000"*2 + "00000000" + "00000000"*2, + }, + "sys8x_90_ddr": { + f'dq{i}': (self.CMD_LATENCY+1)*zero + zero + dq_pattern(i, dfi_data, "wrdata") + zero + for i in range(16) + }, + "sys8x_ddr": { + "dqs0": (self.CMD_LATENCY+1)*zero + '01010101'+'01010100' + '01010101'+'01010101' + '00010101'+'01010101' + zero, + }, + }, + ) def test_lpddr4_cmd_read(self): # Test whole READ command sequence simulating DRAM response and verifying read_latency from MC perspective