soc/cores/ram: allow populating initial values in Nexus LRAM

On designs which use Nexus parts without any external memory, it can be
difficult to fit an embedded ROM program larger than a few KiB. Radiant
cannot infer LRAM, and refuses to infer EBRAM under many circumstances
too, so large memories tend to just consume a huge number of LUTs.

This patch makes it possible to explicitly wire up an LRAM as a ROM,
populate its initial values with a program, and execute directly from
it. That lets us embed programs up to 64KiB.
This commit is contained in:
Dan Callaghan 2021-06-11 16:24:29 +10:00
parent 6b8a35a2f8
commit be4c7cfb34
1 changed files with 50 additions and 8 deletions

View File

@ -21,37 +21,68 @@ Note that this memory is dual port, but we only use a single port in this
instantiation. instantiation.
""" """
def initval_parameters(contents, width):
"""
In Radiant, initial values for LRAM are passed a sequence of parameters
named INITVAL_00 ... INITVAL_7F. Each parameter value contains 4096 bits
of data, encoded as a 1280-digit hexadecimal number, with
alternating sequences of 8 bits of padding and 32 bits of real data,
making up 64KiB altogether.
"""
assert width in [32, 64]
# Each LRAM is 64KiB == 524288 bits
assert len(contents) == 524288 // width
chunk_size = 4096 // width
parameters = []
for i in range(0x80):
name = 'INITVAL_{:02X}'.format(i)
offset = chunk_size * i
if width == 32:
value = '0x' + ''.join('00{:08X}'.format(contents[offset + j])
for j in range(chunk_size - 1, -1, -1))
elif width == 64:
value = '0x' + ''.join('00{:08X}00{:08X}'.format(contents[offset + j] >> 32, contents[offset + j] | 0xFFFFFF)
for j in range(chunk_size - 1, -1, -1))
parameters.append(Instance.Parameter(name, value))
return parameters
class NXLRAM(Module): class NXLRAM(Module):
def __init__(self, width=32, size=128*kB): def __init__(self, width=32, size=128*kB):
self.bus = wishbone.Interface(width) self.bus = wishbone.Interface(width)
assert width in [32, 64] assert width in [32, 64]
self.width = width
self.size = size
if width == 32: if width == 32:
assert size in [64*kB, 128*kB, 192*kB, 256*kB, 320*kB] assert size in [64*kB, 128*kB, 192*kB, 256*kB, 320*kB]
depth_cascading = size//(64*kB) self.depth_cascading = size//(64*kB)
width_cascading = 1 self.width_cascading = 1
if width == 64: if width == 64:
assert size in [128*kB, 256*kB] assert size in [128*kB, 256*kB]
depth_cascading = size//(128*kB) self.depth_cascading = size//(128*kB)
width_cascading = 2 self.width_cascading = 2
self.lram_blocks = []
# Combine RAMs to increase Depth. # Combine RAMs to increase Depth.
for d in range(depth_cascading): for d in range(self.depth_cascading):
self.lram_blocks.append([])
# Combine RAMs to increase Width. # Combine RAMs to increase Width.
for w in range(width_cascading): for w in range(self.width_cascading):
datain = Signal(32) datain = Signal(32)
dataout = Signal(32) dataout = Signal(32)
cs = Signal() cs = Signal()
wren = Signal() wren = Signal()
self.comb += [ self.comb += [
datain.eq(self.bus.dat_w[32*w:32*(w+1)]), datain.eq(self.bus.dat_w[32*w:32*(w+1)]),
If(self.bus.adr[14:14+depth_cascading.bit_length()] == d, If(self.bus.adr[14:14+self.depth_cascading.bit_length()] == d,
cs.eq(1), cs.eq(1),
wren.eq(self.bus.we & self.bus.stb & self.bus.cyc), wren.eq(self.bus.we & self.bus.stb & self.bus.cyc),
self.bus.dat_r[32*w:32*(w+1)].eq(dataout) self.bus.dat_r[32*w:32*(w+1)].eq(dataout)
), ),
] ]
self.specials += Instance("SP512K", lram_block = Instance("SP512K",
p_ECC_BYTE_SEL = "BYTE_EN", p_ECC_BYTE_SEL = "BYTE_EN",
i_DI = datain, i_DI = datain,
i_AD = self.bus.adr[:14], i_AD = self.bus.adr[:14],
@ -64,5 +95,16 @@ class NXLRAM(Module):
i_BYTEEN_N = ~self.bus.sel[4*w:4*(w+1)], i_BYTEEN_N = ~self.bus.sel[4*w:4*(w+1)],
o_DO = dataout o_DO = dataout
) )
self.lram_blocks[d].append(lram_block)
self.specials += lram_block
self.sync += self.bus.ack.eq(self.bus.stb & self.bus.cyc & ~self.bus.ack) self.sync += self.bus.ack.eq(self.bus.stb & self.bus.cyc & ~self.bus.ack)
def add_initial_value(self, data):
# Pad it out to make slicing easier below.
data += [0] * (self.size // self.width * 8 - len(data))
for d in range(self.depth_cascading):
for w in range(self.width_cascading):
offset = d * self.width_cascading * 64*kB + w * 64*kB
chunk = data[offset:offset + 64*kB]
self.lram_blocks[d][w].items += initval_parameters(chunk, self.width)