framebuffer: unpack memory words in pixel clock domain for better perf

This commit is contained in:
Sebastien Bourdeauducq 2013-11-17 23:41:18 +01:00
parent 8f164d0d7b
commit 4cfcda6c8c
3 changed files with 63 additions and 71 deletions

View file

@ -9,23 +9,20 @@ from misoclib.framebuffer.phy import Driver
class Framebuffer(Module, AutoCSR):
def __init__(self, pads_vga, pads_dvi, lasmim, simulation=False):
pack_factor = lasmim.dw//(2*bpp)
packed_pixels = structuring.pack_layout(pixel_layout, pack_factor)
pack_factor = lasmim.dw//bpp
self._enable = CSRStorage()
self.fi = FrameInitiator()
self.fi = FrameInitiator(pack_factor)
self.dma = spi.DMAReadController(dma_lasmi.Reader(lasmim), spi.MODE_EXTERNAL, length_reset=640*480*4)
self.driver = Driver(pads_vga, pads_dvi)
self.driver = Driver(pack_factor, pads_vga, pads_dvi)
cast = structuring.Cast(lasmim.dw, packed_pixels, reverse_to=True)
unpack = structuring.Unpack(pack_factor, pixel_layout)
vtg = VTG()
cast = structuring.Cast(lasmim.dw, pixel_layout(pack_factor), reverse_to=True)
vtg = VTG(pack_factor)
g = DataFlowGraph()
g.add_connection(self.fi, vtg, sink_ep="timing")
g.add_connection(self.dma, cast)
g.add_connection(cast, unpack)
g.add_connection(unpack, vtg, sink_ep="pixels")
g.add_connection(cast, vtg, sink_ep="pixels")
g.add_connection(vtg, self.driver)
self.submodules += CompositeActor(g)
@ -36,10 +33,11 @@ class Framebuffer(Module, AutoCSR):
]
class Blender(PipelinedActor, AutoCSR):
def __init__(self, nimages, latency):
sink_layout = [("i"+str(i), pixel_layout) for i in range(nimages)]
def __init__(self, nimages, pack_factor, latency):
epixel_layout = pixel_layout(pack_factor)
sink_layout = [("i"+str(i), epixel_layout) for i in range(nimages)]
self.sink = Sink(sink_layout)
self.source = Source(pixel_layout)
self.source = Source(epixel_layout)
factors = []
for i in range(nimages):
name = "f"+str(i)
@ -54,8 +52,8 @@ class Blender(PipelinedActor, AutoCSR):
self.sync += If(self.pipe_ce, sink_registered.eq(self.sink.payload))
imgs = [getattr(sink_registered, "i"+str(i)) for i in range(nimages)]
outval = Record(pixel_layout)
for e in pixel_layout:
outval = Record(epixel_layout)
for e in epixel_layout:
name = e[0]
inpixs = [getattr(img, name) for img in imgs]
outpix = getattr(outval, name)
@ -74,7 +72,7 @@ class Blender(PipelinedActor, AutoCSR):
pipe_stmts = []
for i in range(latency-1):
new_outval = Record(pixel_layout)
new_outval = Record(epixel_layout)
pipe_stmts.append(new_outval.eq(outval))
outval = new_outval
self.sync += If(self.pipe_ce, pipe_stmts)
@ -82,29 +80,25 @@ class Blender(PipelinedActor, AutoCSR):
class MixFramebuffer(Module, AutoCSR):
def __init__(self, pads_vga, pads_dvi, *lasmims, blender_latency=5):
pack_factor = lasmims[0].dw//(2*bpp)
packed_pixels = structuring.pack_layout(pixel_layout, pack_factor)
pack_factor = lasmims[0].dw//bpp
self._enable = CSRStorage()
self.fi = FrameInitiator()
self.blender = Blender(len(lasmims), blender_latency)
self.driver = Driver(pads_vga, pads_dvi)
self.fi = FrameInitiator(pack_factor)
self.blender = Blender(len(lasmims), pack_factor, blender_latency)
self.driver = Driver(pack_factor, pads_vga, pads_dvi)
self.comb += self.fi.trigger.eq(self._enable.storage)
g = DataFlowGraph()
epixel_layout = pixel_layout(pack_factor)
for n, lasmim in enumerate(lasmims):
dma = spi.DMAReadController(dma_lasmi.Reader(lasmim), spi.MODE_EXTERNAL, length_reset=640*480*4)
cast = structuring.Cast(lasmim.dw, packed_pixels, reverse_to=True)
unpack = structuring.Unpack(pack_factor, pixel_layout)
cast = structuring.Cast(lasmim.dw, epixel_layout, reverse_to=True)
g.add_connection(dma, cast)
g.add_connection(cast, unpack)
g.add_connection(unpack, self.blender, sink_subr=["i"+str(n)])
g.add_connection(cast, self.blender, sink_subr=["i"+str(n)])
self.comb += dma.generator.trigger.eq(self._enable.storage)
setattr(self, "dma"+str(n), dma)
vtg = VTG()
vtg = VTG(pack_factor)
self.comb += vtg.enable.eq(self._enable.storage)
g.add_connection(self.fi, vtg, sink_ep="timing")
g.add_connection(self.blender, vtg, sink_ep="pixels")

View file

@ -3,7 +3,7 @@ from migen.flow.actor import *
from migen.bank.description import CSRStorage
from migen.actorlib import spi
_hbits = 11
_hbits = 12
_vbits = 12
bpp = 32
@ -14,10 +14,8 @@ pixel_layout_s = [
("g", bpc),
("b", bpc)
]
pixel_layout = [
("p0", pixel_layout_s),
("p1", pixel_layout_s)
]
def pixel_layout(pack_factor):
return [("p"+str(i), pixel_layout_s) for i in range(pack_factor)]
bpc_phy = 8
phy_layout_s = [
@ -25,21 +23,21 @@ phy_layout_s = [
("g", bpc_phy),
("b", bpc_phy)
]
phy_layout = [
("hsync", 1),
("vsync", 1),
("de", 1),
("p0", phy_layout_s),
("p1", phy_layout_s)
]
def phy_layout(pack_factor):
r = [("hsync", 1), ("vsync", 1), ("de", 1)]
for i in range(pack_factor):
r.append(("p"+str(i), phy_layout_s))
return r
class FrameInitiator(spi.SingleGenerator):
def __init__(self):
def __init__(self, pack_factor):
h_alignment_bits = log2_int(pack_factor)
hbits_dyn = _hbits - h_alignment_bits
layout = [
("hres", _hbits, 640, 1),
("hsync_start", _hbits, 656, 1),
("hsync_end", _hbits, 752, 1),
("hscan", _hbits, 800, 1),
("hres", hbits_dyn, 640, h_alignment_bits),
("hsync_start", hbits_dyn, 656, h_alignment_bits),
("hsync_end", hbits_dyn, 752, h_alignment_bits),
("hscan", hbits_dyn, 800, h_alignment_bits),
("vres", _vbits, 480),
("vsync_start", _vbits, 492),
@ -49,19 +47,20 @@ class FrameInitiator(spi.SingleGenerator):
spi.SingleGenerator.__init__(self, layout, spi.MODE_EXTERNAL)
class VTG(Module):
def __init__(self):
def __init__(self, pack_factor):
hbits_dyn = _hbits - log2_int(pack_factor)
self.enable = Signal()
self.timing = Sink([
("hres", _hbits),
("hsync_start", _hbits),
("hsync_end", _hbits),
("hscan", _hbits),
("hres", hbits_dyn),
("hsync_start", hbits_dyn),
("hsync_end", hbits_dyn),
("hscan", hbits_dyn),
("vres", _vbits),
("vsync_start", _vbits),
("vsync_end", _vbits),
("vscan", _vbits)])
self.pixels = Sink(pixel_layout)
self.phy = Source(phy_layout)
self.pixels = Sink(pixel_layout(pack_factor))
self.phy = Source(phy_layout(pack_factor))
self.busy = Signal()
###
@ -71,7 +70,7 @@ class VTG(Module):
active = Signal()
generate_en = Signal()
hcounter = Signal(_hbits)
hcounter = Signal(hbits_dyn)
vcounter = Signal(_vbits)
skip = bpc - bpc_phy
@ -79,7 +78,7 @@ class VTG(Module):
active.eq(hactive & vactive),
If(active,
[getattr(getattr(self.phy.payload, p), c).eq(getattr(getattr(self.pixels.payload, p), c)[skip:])
for p in ["p0", "p1"] for c in ["r", "g", "b"]],
for p in ["p"+str(i) for i in range(pack_factor)] for c in ["r", "g", "b"]],
self.phy.payload.de.eq(1)
),

View file

@ -8,8 +8,8 @@ from misoclib.framebuffer.format import bpc_phy, phy_layout
from misoclib.framebuffer import dvi
class _FIFO(Module):
def __init__(self):
self.phy = Sink(phy_layout)
def __init__(self, pack_factor):
self.phy = Sink(phy_layout(pack_factor))
self.busy = Signal()
self.pix_hsync = Signal()
@ -21,7 +21,7 @@ class _FIFO(Module):
###
fifo = RenameClockDomains(AsyncFIFO(phy_layout, 512),
fifo = RenameClockDomains(AsyncFIFO(phy_layout(pack_factor), 512),
{"write": "sys", "read": "pix"})
self.submodules += fifo
self.comb += [
@ -31,23 +31,22 @@ class _FIFO(Module):
self.busy.eq(0)
]
pix_parity = Signal()
unpack_counter = Signal(max=pack_factor)
assert(pack_factor & (pack_factor - 1) == 0) # only support powers of 2
self.sync.pix += [
pix_parity.eq(~pix_parity),
unpack_counter.eq(unpack_counter + 1),
self.pix_hsync.eq(fifo.dout.hsync),
self.pix_vsync.eq(fifo.dout.vsync),
self.pix_de.eq(fifo.dout.de),
If(pix_parity,
self.pix_r.eq(fifo.dout.p1.r),
self.pix_g.eq(fifo.dout.p1.g),
self.pix_b.eq(fifo.dout.p1.b)
).Else(
self.pix_r.eq(fifo.dout.p0.r),
self.pix_g.eq(fifo.dout.p0.g),
self.pix_b.eq(fifo.dout.p0.b)
)
self.pix_de.eq(fifo.dout.de)
]
self.comb += fifo.re.eq(pix_parity)
for i in range(pack_factor):
pixel = getattr(fifo.dout, "p"+str(i))
self.sync.pix += If(unpack_counter == i,
self.pix_r.eq(pixel.r),
self.pix_g.eq(pixel.g),
self.pix_b.eq(pixel.b)
)
self.comb += fifo.re.eq(unpack_counter == (pack_factor - 1))
# This assumes a 50MHz base clock
class _Clocking(Module, AutoCSR):
@ -168,8 +167,8 @@ class _Clocking(Module, AutoCSR):
o_O=pads_dvi.clk_p, o_OB=pads_dvi.clk_n)
class Driver(Module, AutoCSR):
def __init__(self, pads_vga, pads_dvi):
fifo = _FIFO()
def __init__(self, pack_factor, pads_vga, pads_dvi):
fifo = _FIFO(pack_factor)
self.submodules += fifo
self.phy = fifo.phy
self.busy = fifo.busy