From abc77f367c7c61b52d119d40031f5b68aea95ae5 Mon Sep 17 00:00:00 2001
From: Alessandro Comodi <acomodi@antmicro.com>
Date: Wed, 1 Sep 2021 18:00:51 +0200
Subject: [PATCH] lpddr5: wck sync: fix syncing and adjusted unit tests

Signed-off-by: Alessandro Comodi <acomodi@antmicro.com>
---
 litedram/init.py               |  4 +-
 litedram/phy/lpddr5/basephy.py | 36 +++++++++++++----
 test/test_lpddr5.py            | 74 +++++++++++++++++++++++++++++-----
 3 files changed, 93 insertions(+), 21 deletions(-)

diff --git a/litedram/init.py b/litedram/init.py
index 1269436..8404869 100644
--- a/litedram/init.py
+++ b/litedram/init.py
@@ -704,8 +704,8 @@ def get_lpddr5_phy_init_sequence(phy_settings, timing_settings):
     mr[18] = reg([
         (0, 3, rzq_map[wck_odt]),
         (3, 1, 0),  # WCK low frequency mode
-        (4, 1, 0),  # WCK always on mode enabled
-        (6, 1, 0),  # WCK2CK leveling diabled
+        (4, 1, 0),  # WCK always on mode disabled
+        (6, 1, 0),  # WCK2CK leveling disabled
         (7, 1, {2: 1, 4: 0}[wck_ck_ratio]),
     ])
     # MR19 - defaults
diff --git a/litedram/phy/lpddr5/basephy.py b/litedram/phy/lpddr5/basephy.py
index 286a2f9..166a5fb 100644
--- a/litedram/phy/lpddr5/basephy.py
+++ b/litedram/phy/lpddr5/basephy.py
@@ -303,10 +303,12 @@ class LPDDR5PHY(Module, AutoCSR):
 
         wck_pattern = Signal(8)  # for WCK:CK=2:1 we take wck_pattern[::2]
         patterns = {
-            "disabled":   "________",
-            "static":     "________",
-            "toggle":     "--__--__",
-            "toggle_4:1": "-_-_-_-_",
+            "disabled":      "________",
+            "static":        "________",
+            "toggle":        "--__--__",
+            "toggle_4:1":    "-_-_-_-_",
+            "postamble":     "--______",
+            "postamble_4:1": "-_-_-___",
         }
 
         assert frange.t_wckpre_static > 0  # The algorithm assumes it's never 0
@@ -332,7 +334,7 @@ class LPDDR5PHY(Module, AutoCSR):
         wck_fsm.act("TOGGLE",
             wck_pattern.eq(bitpattern(patterns["toggle"])),
             If(~wck_sync_done,
-                NextState("DISABLED")
+                NextState("POSTAMBLE")
             ).Elif((wck_ck_ratio == 4),  # go to full speed in the next cycle
                 NextState("TOGGLE_4:1")
             ),
@@ -340,9 +342,22 @@ class LPDDR5PHY(Module, AutoCSR):
         wck_fsm.act("TOGGLE_4:1",
             wck_pattern.eq(bitpattern(patterns["toggle_4:1"])),
             If(~wck_sync_done,
-                NextState("DISABLED")
+                NextState("POSTAMBLE")
             ),
         )
+        wck_fsm.act("POSTAMBLE",
+            If((wck_ck_ratio == 4),
+                wck_pattern.eq(bitpattern(patterns["postamble_4:1"])),
+                NextState("DISABLED")
+            ).Else(
+                wck_pattern.eq(bitpattern(patterns["toggle"])),
+                NextState("POSTAMBLE_2:1")
+            )
+        )
+        wck_fsm.act("POSTAMBLE_2:1",
+            wck_pattern.eq(bitpattern(patterns["postamble"])),
+            NextState("DISABLED")
+        )
 
         wck_out = {2: wck_pattern[::2], 4: wck_pattern}[wck_ck_ratio]
         assert len(wck_out) == len(self.out.wck[0]), (len(wck_out), len(self.out.wck))
@@ -424,17 +439,22 @@ class LPDDR5PHY(Module, AutoCSR):
             self.dfi.p0.rddata_valid.eq(rddata_converter.source.valid),
         ]
 
+
+        # -2 is to take into account the serialization time for wck
+        read_wck_latency = cmd_latency + cl + burst_ck_cycles - 2
+        write_wck_latency = cmd_latency + cwl + burst_ck_cycles - 2
+
         self.wck_sync_state = Signal(2)
         self.sync += If(self.adapter.wck_sync != 0,
             wck_sync_done.eq(1),
             self.wck_sync_state.eq(self.adapter.wck_sync)
         ).Elif(self.wck_sync_state == WCKSyncType.RD,
-            If(reduce(or_, rddata_en.taps[0:rddata_start+burst_ck_cycles]) == 0,
+            If(reduce(or_, rddata_en.taps[0:read_wck_latency]) == 0,
                 wck_sync_done.eq(0),
                 self.wck_sync_state.eq(0b00),
             )
         ).Elif(self.wck_sync_state == WCKSyncType.WR,
-            If(reduce(or_, wrdata_en.taps[0:wrtap+burst_ck_cycles]) == 0,
+            If(reduce(or_, wrdata_en.taps[0:write_wck_latency]) == 0,
                 wck_sync_done.eq(0),
                 self.wck_sync_state.eq(0b00),
             )
diff --git a/test/test_lpddr5.py b/test/test_lpddr5.py
index e27250c..b61bf89 100644
--- a/test/test_lpddr5.py
+++ b/test/test_lpddr5.py
@@ -185,7 +185,7 @@ class LPDDR5Tests(unittest.TestCase):
                 read,  # with WCK sync
                 {},
                 {},
-                read,  # no WCK sync
+                read,  # with WCK sync
                 {},
                 {},
             ],
@@ -215,7 +215,7 @@ class LPDDR5Tests(unittest.TestCase):
                 mrr,  # with WCK sync
                 {},
                 {},
-                mrr,  # no WCK sync
+                mrr,  # with WCK sync
                 {},
                 {},
             ],
@@ -249,7 +249,7 @@ class LPDDR5Tests(unittest.TestCase):
                         write,  # with WCK sync
                         {},
                         {},
-                        write,  # no WCK sync
+                        write,  # with WCK sync
                         {},
                         {},
                     ],
@@ -291,12 +291,12 @@ class LPDDR5Tests(unittest.TestCase):
                     dfi_sequence = [
                         {},
                         {0: read}, {},  # WCK sync
-                        {0: write_ap}, {},  # no WCK sync
+                        {0: write_ap}, {},  # with WCK sync
                         {0: activate}, {},
                         {0: refresh_ab},{},
                         {0: precharge}, {},
                         {0: mrw}, {},
-                        {0: mrr}, {},  # no WCK sync
+                        {0: mrr}, {},  # with WCK sync
                         {0: zqc_start}, {},
                         {0: zqc_latch}, {},
                     ],
@@ -497,12 +497,31 @@ class LPDDR5Tests(unittest.TestCase):
                     0: dict(wrdata=0xfffefffffffefffffffefffffffefffffffefffffffefffffffefffffffeffff),
                 }
                 latency = [{}] * (wl - 1)
+
+                # minimum latency to have correct wck synchronization
+                consecutive_burst_latency = [{}] * 6
+
                 wck_preamble = "00 00" * t["t_wckenl_wr"] + "00 00" * t["t_wckenl_static"] + "10 10" * t["t_wckenl_toggle_wr"]
+                wck_burst = "10 10" * (16//4)
+                wck_postamble = "10 10" + "10 00"
+
                 self.run_test(phy,
                     dfi_sequence = [
                         {0: dict(cs_n=0, cas_n=0, ras_n=1, we_n=0, wrdata_en=1)},
                         *latency,
-                        dfi_data
+                        dfi_data,
+                        *consecutive_burst_latency,
+                        {0: dict(cs_n=0, cas_n=0, ras_n=1, we_n=0, wrdata_en=1)},
+                        *latency,
+                        dfi_data,
+                        *consecutive_burst_latency,
+                        {0: dict(cs_n=0, cas_n=0, ras_n=1, we_n=0, wrdata_en=1)},
+                        *latency,
+                        dfi_data,
+                        *consecutive_burst_latency,
+                        {0: dict(cs_n=0, cas_n=0, ras_n=1, we_n=0, wrdata_en=1)},
+                        *latency,
+                        dfi_data,
                     ],
                     pad_checkers = {
                         "sys_270": {
@@ -516,7 +535,10 @@ class LPDDR5Tests(unittest.TestCase):
                             # tWCKENL_WR starts counting from first command (CAS) so we add command latency,
                             # then preamble, then toggle for the whole burst, then postamble for tWCKPST=2.5tCK
                             # (but for now we assume that WCK is never disabled)
-                            "wck0": "0000 0000" + wck_preamble + "10 10" * (16//4) + "10 10 1" + "0 10" + "00 00"*2,
+                            "wck0": "0000 0000" + wck_preamble + wck_burst + wck_postamble + \
+                                    "0000" + wck_preamble + wck_burst + wck_postamble + \
+                                    "0000" + wck_preamble + wck_burst + wck_postamble + \
+                                    "0000" + wck_preamble + wck_burst + wck_postamble,
                         },
                     },
                     chunk_size=4,
@@ -540,12 +562,31 @@ class LPDDR5Tests(unittest.TestCase):
                     0: dict(wrdata=0xfffefffffffefffffffefffffffefffffffefffffffefffffffefffffffeffff),
                 }
                 latency = [{}] * (wl - 1)
+
+                # minimum latency to have correct wck synchronization
+                consecutive_burst_latency = [{}] * 3
+
                 wck_preamble = "00000000" * (t["t_wckenl_wr"] + t["t_wckenl_static"]) + "11001100" + "10101010" * (t["t_wckenl_toggle_wr"] - 1)
+                wck_burst = "10101010" * (16//8)
+                wck_postamble = "10101000"
+
                 self.run_test(phy,
                     dfi_sequence = [
                         {0: dict(cs_n=0, cas_n=0, ras_n=1, we_n=0, wrdata_en=1)},
                         *latency,
-                        dfi_data
+                        dfi_data,
+                        *consecutive_burst_latency,
+                        {0: dict(cs_n=0, cas_n=0, ras_n=1, we_n=0, wrdata_en=1)},
+                        *latency,
+                        dfi_data,
+                        *consecutive_burst_latency,
+                        {0: dict(cs_n=0, cas_n=0, ras_n=1, we_n=0, wrdata_en=1)},
+                        *latency,
+                        dfi_data,
+                        *consecutive_burst_latency,
+                        {0: dict(cs_n=0, cas_n=0, ras_n=1, we_n=0, wrdata_en=1)},
+                        *latency,
+                        dfi_data,
                     ],
                     pad_checkers = {
                         "sys_270": {
@@ -559,7 +600,10 @@ class LPDDR5Tests(unittest.TestCase):
                             # tWCKENL_WR starts counting from first command (CAS) so we add command latency,
                             # then preamble, then toggle for the whole burst, then postamble for tWCKPST=2.5tCK
                             # (but for now we assume that WCK is never disabled)
-                            "wck0": "00000000 00000000" + wck_preamble + "10101010" * (16//8) + "10101" + "0 10" + "10 10"*2,
+                            "wck0": "00000000 00000000" + wck_preamble + wck_burst + wck_postamble + \
+                                    "00000000" + wck_preamble + wck_burst + wck_postamble + \
+                                    "00000000" + wck_preamble + wck_burst + wck_postamble + \
+                                    "00000000" + wck_preamble + wck_burst + wck_postamble,
                         },
                     },
                 )
@@ -579,7 +623,11 @@ class LPDDR5Tests(unittest.TestCase):
                 phy = LPDDR5SimPHY(sys_clk_freq=sys_clk_freq)
                 rl = phy.settings.read_latency
                 latency = [{}] * (rl - 1)
+
                 wck_preamble = "00 00" * t["t_wckenl_rd"] + "00 00" * t["t_wckenl_static"] + "10 10" * t["t_wckenl_toggle_rd"]
+                wck_burst = "10 10" * (16//4)
+                wck_postamble = "10 10" + "10 00"
+
                 self.run_test(phy,
                     dfi_sequence = [
                         {0: dict(cs_n=0, cas_n=0, ras_n=1, we_n=1, rddata_en=1)},
@@ -591,7 +639,7 @@ class LPDDR5Tests(unittest.TestCase):
                             "cs": "01100000",
                         },
                         "sys4x_270": {
-                            "wck0": "0000 0000" + wck_preamble + "10 10" * (16//4) + "10 10 1" + "0 10" + "10 10"*2,
+                            "wck0": "0000 0000" + wck_preamble + wck_burst + wck_postamble + "00 00",
                         },
                     },
                     chunk_size=4,
@@ -612,7 +660,11 @@ class LPDDR5Tests(unittest.TestCase):
                 phy = LPDDR5SimPHY(sys_clk_freq=sys_clk_freq, wck_ck_ratio=4)
                 rl = phy.settings.read_latency
                 latency = [{}] * (rl - 1)
+
                 wck_preamble = "00000000" * (t["t_wckenl_rd"] + t["t_wckenl_static"]) + "11001100" + "10101010" * (t["t_wckenl_toggle_rd"] - 1)
+                wck_burst = "10101010" * (16//8)
+                wck_postamble = "10101000"
+
                 self.run_test(phy,
                     dfi_sequence = [
                         {0: dict(cs_n=0, cas_n=0, ras_n=1, we_n=1, rddata_en=1)},
@@ -624,7 +676,7 @@ class LPDDR5Tests(unittest.TestCase):
                             "cs": "01100000",
                         },
                         "sys8x_270": {
-                            "wck0": "00000000 00000000" + wck_preamble + "10101010" * (16//8) + "10101" + "0 10" + "10 10"*2,
+                            "wck0": "00000000 00000000" + wck_preamble + wck_burst + wck_postamble + "00000000",
                         },
                     },
                 )