framebuffer: unpack memory words in pixel clock domain for better perf

This commit is contained in:
Sebastien Bourdeauducq 2013-11-17 23:41:18 +01:00
parent 8f164d0d7b
commit 4cfcda6c8c
3 changed files with 63 additions and 71 deletions

View file

@ -9,23 +9,20 @@ from misoclib.framebuffer.phy import Driver
class Framebuffer(Module, AutoCSR): class Framebuffer(Module, AutoCSR):
def __init__(self, pads_vga, pads_dvi, lasmim, simulation=False): def __init__(self, pads_vga, pads_dvi, lasmim, simulation=False):
pack_factor = lasmim.dw//(2*bpp) pack_factor = lasmim.dw//bpp
packed_pixels = structuring.pack_layout(pixel_layout, pack_factor)
self._enable = CSRStorage() self._enable = CSRStorage()
self.fi = FrameInitiator() self.fi = FrameInitiator(pack_factor)
self.dma = spi.DMAReadController(dma_lasmi.Reader(lasmim), spi.MODE_EXTERNAL, length_reset=640*480*4) self.dma = spi.DMAReadController(dma_lasmi.Reader(lasmim), spi.MODE_EXTERNAL, length_reset=640*480*4)
self.driver = Driver(pads_vga, pads_dvi) self.driver = Driver(pack_factor, pads_vga, pads_dvi)
cast = structuring.Cast(lasmim.dw, packed_pixels, reverse_to=True) cast = structuring.Cast(lasmim.dw, pixel_layout(pack_factor), reverse_to=True)
unpack = structuring.Unpack(pack_factor, pixel_layout) vtg = VTG(pack_factor)
vtg = VTG()
g = DataFlowGraph() g = DataFlowGraph()
g.add_connection(self.fi, vtg, sink_ep="timing") g.add_connection(self.fi, vtg, sink_ep="timing")
g.add_connection(self.dma, cast) g.add_connection(self.dma, cast)
g.add_connection(cast, unpack) g.add_connection(cast, vtg, sink_ep="pixels")
g.add_connection(unpack, vtg, sink_ep="pixels")
g.add_connection(vtg, self.driver) g.add_connection(vtg, self.driver)
self.submodules += CompositeActor(g) self.submodules += CompositeActor(g)
@ -36,10 +33,11 @@ class Framebuffer(Module, AutoCSR):
] ]
class Blender(PipelinedActor, AutoCSR): class Blender(PipelinedActor, AutoCSR):
def __init__(self, nimages, latency): def __init__(self, nimages, pack_factor, latency):
sink_layout = [("i"+str(i), pixel_layout) for i in range(nimages)] epixel_layout = pixel_layout(pack_factor)
sink_layout = [("i"+str(i), epixel_layout) for i in range(nimages)]
self.sink = Sink(sink_layout) self.sink = Sink(sink_layout)
self.source = Source(pixel_layout) self.source = Source(epixel_layout)
factors = [] factors = []
for i in range(nimages): for i in range(nimages):
name = "f"+str(i) name = "f"+str(i)
@ -54,8 +52,8 @@ class Blender(PipelinedActor, AutoCSR):
self.sync += If(self.pipe_ce, sink_registered.eq(self.sink.payload)) self.sync += If(self.pipe_ce, sink_registered.eq(self.sink.payload))
imgs = [getattr(sink_registered, "i"+str(i)) for i in range(nimages)] imgs = [getattr(sink_registered, "i"+str(i)) for i in range(nimages)]
outval = Record(pixel_layout) outval = Record(epixel_layout)
for e in pixel_layout: for e in epixel_layout:
name = e[0] name = e[0]
inpixs = [getattr(img, name) for img in imgs] inpixs = [getattr(img, name) for img in imgs]
outpix = getattr(outval, name) outpix = getattr(outval, name)
@ -74,7 +72,7 @@ class Blender(PipelinedActor, AutoCSR):
pipe_stmts = [] pipe_stmts = []
for i in range(latency-1): for i in range(latency-1):
new_outval = Record(pixel_layout) new_outval = Record(epixel_layout)
pipe_stmts.append(new_outval.eq(outval)) pipe_stmts.append(new_outval.eq(outval))
outval = new_outval outval = new_outval
self.sync += If(self.pipe_ce, pipe_stmts) self.sync += If(self.pipe_ce, pipe_stmts)
@ -82,29 +80,25 @@ class Blender(PipelinedActor, AutoCSR):
class MixFramebuffer(Module, AutoCSR): class MixFramebuffer(Module, AutoCSR):
def __init__(self, pads_vga, pads_dvi, *lasmims, blender_latency=5): def __init__(self, pads_vga, pads_dvi, *lasmims, blender_latency=5):
pack_factor = lasmims[0].dw//(2*bpp) pack_factor = lasmims[0].dw//bpp
packed_pixels = structuring.pack_layout(pixel_layout, pack_factor)
self._enable = CSRStorage() self._enable = CSRStorage()
self.fi = FrameInitiator() self.fi = FrameInitiator(pack_factor)
self.blender = Blender(len(lasmims), blender_latency) self.blender = Blender(len(lasmims), pack_factor, blender_latency)
self.driver = Driver(pads_vga, pads_dvi) self.driver = Driver(pack_factor, pads_vga, pads_dvi)
self.comb += self.fi.trigger.eq(self._enable.storage) self.comb += self.fi.trigger.eq(self._enable.storage)
g = DataFlowGraph() g = DataFlowGraph()
epixel_layout = pixel_layout(pack_factor)
for n, lasmim in enumerate(lasmims): for n, lasmim in enumerate(lasmims):
dma = spi.DMAReadController(dma_lasmi.Reader(lasmim), spi.MODE_EXTERNAL, length_reset=640*480*4) dma = spi.DMAReadController(dma_lasmi.Reader(lasmim), spi.MODE_EXTERNAL, length_reset=640*480*4)
cast = structuring.Cast(lasmim.dw, packed_pixels, reverse_to=True) cast = structuring.Cast(lasmim.dw, epixel_layout, reverse_to=True)
unpack = structuring.Unpack(pack_factor, pixel_layout)
g.add_connection(dma, cast) g.add_connection(dma, cast)
g.add_connection(cast, unpack) g.add_connection(cast, self.blender, sink_subr=["i"+str(n)])
g.add_connection(unpack, self.blender, sink_subr=["i"+str(n)])
self.comb += dma.generator.trigger.eq(self._enable.storage) self.comb += dma.generator.trigger.eq(self._enable.storage)
setattr(self, "dma"+str(n), dma) setattr(self, "dma"+str(n), dma)
vtg = VTG() vtg = VTG(pack_factor)
self.comb += vtg.enable.eq(self._enable.storage) self.comb += vtg.enable.eq(self._enable.storage)
g.add_connection(self.fi, vtg, sink_ep="timing") g.add_connection(self.fi, vtg, sink_ep="timing")
g.add_connection(self.blender, vtg, sink_ep="pixels") g.add_connection(self.blender, vtg, sink_ep="pixels")

View file

@ -3,7 +3,7 @@ from migen.flow.actor import *
from migen.bank.description import CSRStorage from migen.bank.description import CSRStorage
from migen.actorlib import spi from migen.actorlib import spi
_hbits = 11 _hbits = 12
_vbits = 12 _vbits = 12
bpp = 32 bpp = 32
@ -14,10 +14,8 @@ pixel_layout_s = [
("g", bpc), ("g", bpc),
("b", bpc) ("b", bpc)
] ]
pixel_layout = [ def pixel_layout(pack_factor):
("p0", pixel_layout_s), return [("p"+str(i), pixel_layout_s) for i in range(pack_factor)]
("p1", pixel_layout_s)
]
bpc_phy = 8 bpc_phy = 8
phy_layout_s = [ phy_layout_s = [
@ -25,21 +23,21 @@ phy_layout_s = [
("g", bpc_phy), ("g", bpc_phy),
("b", bpc_phy) ("b", bpc_phy)
] ]
phy_layout = [ def phy_layout(pack_factor):
("hsync", 1), r = [("hsync", 1), ("vsync", 1), ("de", 1)]
("vsync", 1), for i in range(pack_factor):
("de", 1), r.append(("p"+str(i), phy_layout_s))
("p0", phy_layout_s), return r
("p1", phy_layout_s)
]
class FrameInitiator(spi.SingleGenerator): class FrameInitiator(spi.SingleGenerator):
def __init__(self): def __init__(self, pack_factor):
h_alignment_bits = log2_int(pack_factor)
hbits_dyn = _hbits - h_alignment_bits
layout = [ layout = [
("hres", _hbits, 640, 1), ("hres", hbits_dyn, 640, h_alignment_bits),
("hsync_start", _hbits, 656, 1), ("hsync_start", hbits_dyn, 656, h_alignment_bits),
("hsync_end", _hbits, 752, 1), ("hsync_end", hbits_dyn, 752, h_alignment_bits),
("hscan", _hbits, 800, 1), ("hscan", hbits_dyn, 800, h_alignment_bits),
("vres", _vbits, 480), ("vres", _vbits, 480),
("vsync_start", _vbits, 492), ("vsync_start", _vbits, 492),
@ -49,19 +47,20 @@ class FrameInitiator(spi.SingleGenerator):
spi.SingleGenerator.__init__(self, layout, spi.MODE_EXTERNAL) spi.SingleGenerator.__init__(self, layout, spi.MODE_EXTERNAL)
class VTG(Module): class VTG(Module):
def __init__(self): def __init__(self, pack_factor):
hbits_dyn = _hbits - log2_int(pack_factor)
self.enable = Signal() self.enable = Signal()
self.timing = Sink([ self.timing = Sink([
("hres", _hbits), ("hres", hbits_dyn),
("hsync_start", _hbits), ("hsync_start", hbits_dyn),
("hsync_end", _hbits), ("hsync_end", hbits_dyn),
("hscan", _hbits), ("hscan", hbits_dyn),
("vres", _vbits), ("vres", _vbits),
("vsync_start", _vbits), ("vsync_start", _vbits),
("vsync_end", _vbits), ("vsync_end", _vbits),
("vscan", _vbits)]) ("vscan", _vbits)])
self.pixels = Sink(pixel_layout) self.pixels = Sink(pixel_layout(pack_factor))
self.phy = Source(phy_layout) self.phy = Source(phy_layout(pack_factor))
self.busy = Signal() self.busy = Signal()
### ###
@ -71,7 +70,7 @@ class VTG(Module):
active = Signal() active = Signal()
generate_en = Signal() generate_en = Signal()
hcounter = Signal(_hbits) hcounter = Signal(hbits_dyn)
vcounter = Signal(_vbits) vcounter = Signal(_vbits)
skip = bpc - bpc_phy skip = bpc - bpc_phy
@ -79,7 +78,7 @@ class VTG(Module):
active.eq(hactive & vactive), active.eq(hactive & vactive),
If(active, If(active,
[getattr(getattr(self.phy.payload, p), c).eq(getattr(getattr(self.pixels.payload, p), c)[skip:]) [getattr(getattr(self.phy.payload, p), c).eq(getattr(getattr(self.pixels.payload, p), c)[skip:])
for p in ["p0", "p1"] for c in ["r", "g", "b"]], for p in ["p"+str(i) for i in range(pack_factor)] for c in ["r", "g", "b"]],
self.phy.payload.de.eq(1) self.phy.payload.de.eq(1)
), ),

View file

@ -8,8 +8,8 @@ from misoclib.framebuffer.format import bpc_phy, phy_layout
from misoclib.framebuffer import dvi from misoclib.framebuffer import dvi
class _FIFO(Module): class _FIFO(Module):
def __init__(self): def __init__(self, pack_factor):
self.phy = Sink(phy_layout) self.phy = Sink(phy_layout(pack_factor))
self.busy = Signal() self.busy = Signal()
self.pix_hsync = Signal() self.pix_hsync = Signal()
@ -21,7 +21,7 @@ class _FIFO(Module):
### ###
fifo = RenameClockDomains(AsyncFIFO(phy_layout, 512), fifo = RenameClockDomains(AsyncFIFO(phy_layout(pack_factor), 512),
{"write": "sys", "read": "pix"}) {"write": "sys", "read": "pix"})
self.submodules += fifo self.submodules += fifo
self.comb += [ self.comb += [
@ -31,23 +31,22 @@ class _FIFO(Module):
self.busy.eq(0) self.busy.eq(0)
] ]
pix_parity = Signal() unpack_counter = Signal(max=pack_factor)
assert(pack_factor & (pack_factor - 1) == 0) # only support powers of 2
self.sync.pix += [ self.sync.pix += [
pix_parity.eq(~pix_parity), unpack_counter.eq(unpack_counter + 1),
self.pix_hsync.eq(fifo.dout.hsync), self.pix_hsync.eq(fifo.dout.hsync),
self.pix_vsync.eq(fifo.dout.vsync), self.pix_vsync.eq(fifo.dout.vsync),
self.pix_de.eq(fifo.dout.de), self.pix_de.eq(fifo.dout.de)
If(pix_parity,
self.pix_r.eq(fifo.dout.p1.r),
self.pix_g.eq(fifo.dout.p1.g),
self.pix_b.eq(fifo.dout.p1.b)
).Else(
self.pix_r.eq(fifo.dout.p0.r),
self.pix_g.eq(fifo.dout.p0.g),
self.pix_b.eq(fifo.dout.p0.b)
)
] ]
self.comb += fifo.re.eq(pix_parity) for i in range(pack_factor):
pixel = getattr(fifo.dout, "p"+str(i))
self.sync.pix += If(unpack_counter == i,
self.pix_r.eq(pixel.r),
self.pix_g.eq(pixel.g),
self.pix_b.eq(pixel.b)
)
self.comb += fifo.re.eq(unpack_counter == (pack_factor - 1))
# This assumes a 50MHz base clock # This assumes a 50MHz base clock
class _Clocking(Module, AutoCSR): class _Clocking(Module, AutoCSR):
@ -168,8 +167,8 @@ class _Clocking(Module, AutoCSR):
o_O=pads_dvi.clk_p, o_OB=pads_dvi.clk_n) o_O=pads_dvi.clk_p, o_OB=pads_dvi.clk_n)
class Driver(Module, AutoCSR): class Driver(Module, AutoCSR):
def __init__(self, pads_vga, pads_dvi): def __init__(self, pack_factor, pads_vga, pads_dvi):
fifo = _FIFO() fifo = _FIFO(pack_factor)
self.submodules += fifo self.submodules += fifo
self.phy = fifo.phy self.phy = fifo.phy
self.busy = fifo.busy self.busy = fifo.busy