soc/cores/ram: allow populating initial values in Nexus LRAM

On designs which use Nexus parts without any external memory, it can be difficult to fit an embedded ROM program larger than a few KiB. Radiant cannot infer LRAM, and refuses to infer EBRAM under many circumstances too, so large memories tend to just consume a huge number of LUTs. This patch makes it possible to explicitly wire up an LRAM as a ROM, populate its initial values with a program, and execute directly from it. That lets us embed programs up to 64KiB.
2021-06-11 16:24:29 +10:00 · 2021-06-11 16:24:29 +10:00 · be4c7cfb34
parent 6b8a35a2f8
commit be4c7cfb34
1 changed files with 50 additions and 8 deletions
--- a/litex/soc/cores/ram/lattice_nx.py
+++ b/litex/soc/cores/ram/lattice_nx.py
@ -21,37 +21,68 @@ Note that this memory is dual port, but we only use a single port in this
 instantiation.
 """

+
+def initval_parameters(contents, width):
+    """
+    In Radiant, initial values for LRAM are passed a sequence of parameters
+    named INITVAL_00 ... INITVAL_7F. Each parameter value contains 4096 bits
+    of data, encoded as a 1280-digit hexadecimal number, with
+    alternating sequences of 8 bits of padding and 32 bits of real data,
+    making up 64KiB altogether.
+    """
+    assert width in [32, 64]
+    # Each LRAM is 64KiB == 524288 bits
+    assert len(contents) == 524288 // width
+    chunk_size = 4096 // width
+    parameters = []
+    for i in range(0x80):
+        name = 'INITVAL_{:02X}'.format(i)
+        offset = chunk_size * i
+        if width == 32:
+            value = '0x' + ''.join('00{:08X}'.format(contents[offset + j])
+                                   for j in range(chunk_size - 1, -1, -1))
+        elif width == 64:
+            value = '0x' + ''.join('00{:08X}00{:08X}'.format(contents[offset + j] >> 32, contents[offset + j] | 0xFFFFFF)
+                                   for j in range(chunk_size - 1, -1, -1))
+        parameters.append(Instance.Parameter(name, value))
+    return parameters
+
+
 class NXLRAM(Module):
    def __init__(self, width=32, size=128*kB):
        self.bus = wishbone.Interface(width)
        assert width in [32, 64]
+        self.width = width
+        self.size = size

        if width == 32:
            assert size in [64*kB, 128*kB, 192*kB, 256*kB, 320*kB]
-            depth_cascading = size//(64*kB)
-            width_cascading = 1
+            self.depth_cascading = size//(64*kB)
+            self.width_cascading = 1
        if width == 64:
            assert size in [128*kB, 256*kB]
-            depth_cascading = size//(128*kB)
-            width_cascading = 2
+            self.depth_cascading = size//(128*kB)
+            self.width_cascading = 2

+        self.lram_blocks = []
        # Combine RAMs to increase Depth.
-        for d in range(depth_cascading):
+        for d in range(self.depth_cascading):
+            self.lram_blocks.append([])
            # Combine RAMs to increase Width.
-            for w in range(width_cascading):
+            for w in range(self.width_cascading):
                datain  = Signal(32)
                dataout = Signal(32)
                cs      = Signal()
                wren    = Signal()
                self.comb += [
                    datain.eq(self.bus.dat_w[32*w:32*(w+1)]),
-                    If(self.bus.adr[14:14+depth_cascading.bit_length()] == d,
+                    If(self.bus.adr[14:14+self.depth_cascading.bit_length()] == d,
                        cs.eq(1),
                        wren.eq(self.bus.we & self.bus.stb & self.bus.cyc),
                        self.bus.dat_r[32*w:32*(w+1)].eq(dataout)
                    ),
                ]
-                self.specials += Instance("SP512K",
+                lram_block = Instance("SP512K",
                    p_ECC_BYTE_SEL = "BYTE_EN",
                    i_DI       = datain,
                    i_AD       = self.bus.adr[:14],
@ -64,5 +95,16 @@ class NXLRAM(Module):
                    i_BYTEEN_N = ~self.bus.sel[4*w:4*(w+1)],
                    o_DO       = dataout
                )
+                self.lram_blocks[d].append(lram_block)
+                self.specials += lram_block

        self.sync += self.bus.ack.eq(self.bus.stb & self.bus.cyc & ~self.bus.ack)
+
+    def add_initial_value(self, data):
+        # Pad it out to make slicing easier below.
+        data += [0] * (self.size // self.width * 8 - len(data))
+        for d in range(self.depth_cascading):
+            for w in range(self.width_cascading):
+                offset = d * self.width_cascading * 64*kB + w * 64*kB
+                chunk = data[offset:offset + 64*kB]
+                self.lram_blocks[d][w].items += initval_parameters(chunk, self.width)