dvisampler: pack pixels in pixel clock domain to improve performance

This commit is contained in:
Sebastien Bourdeauducq 2013-11-16 13:53:26 +01:00
parent d225bdf362
commit 6f990a017e
4 changed files with 58 additions and 61 deletions

View File

@ -12,7 +12,7 @@ from misoclib.dvisampler.analysis import SyncPolarity, ResolutionDetection, Fram
from misoclib.dvisampler.dma import DMA
class DVISampler(Module, AutoCSR):
def __init__(self, pads, asmiport, n_dma_slots=2):
def __init__(self, pads, lasmim, n_dma_slots=2):
self.submodules.edid = EDID(pads)
self.submodules.clocking = Clocking(pads)
@ -62,7 +62,7 @@ class DVISampler(Module, AutoCSR):
self.resdetection.vsync.eq(self.syncpol.vsync)
]
self.submodules.frame = FrameExtraction()
self.submodules.frame = FrameExtraction(24*lasmim.dw//32)
self.comb += [
self.frame.valid_i.eq(self.syncpol.valid_o),
self.frame.de.eq(self.syncpol.de),
@ -72,7 +72,7 @@ class DVISampler(Module, AutoCSR):
self.frame.b.eq(self.syncpol.b)
]
self.submodules.dma = DMA(asmiport, n_dma_slots)
self.submodules.dma = DMA(lasmim, n_dma_slots)
self.comb += self.frame.frame.connect(self.dma.frame)
self.ev = self.dma.ev

View File

@ -5,7 +5,7 @@ from migen.genlib.record import Record
from migen.bank.description import *
from migen.flow.actor import *
from misoclib.dvisampler.common import channel_layout, frame_layout
from misoclib.dvisampler.common import channel_layout
class SyncPolarity(Module):
def __init__(self):
@ -106,7 +106,7 @@ class ResolutionDetection(Module, AutoCSR):
self.specials += MultiReg(vcounter_st, self._vres.status)
class FrameExtraction(Module, AutoCSR):
def __init__(self):
def __init__(self, word_width):
# in pix clock domain
self.valid_i = Signal()
self.vsync = Signal()
@ -116,39 +116,55 @@ class FrameExtraction(Module, AutoCSR):
self.b = Signal(8)
# in sys clock domain
self.frame = Source(frame_layout)
word_layout = [("parity", 1), ("pixels", word_width)]
self.frame = Source(word_layout)
self.busy = Signal()
self._r_overflow = CSR()
###
fifo_stb = Signal()
fifo_in = Record(frame_layout)
self.comb += [
fifo_stb.eq(self.valid_i & self.de),
fifo_in.r.eq(self.r),
fifo_in.g.eq(self.g),
fifo_in.b.eq(self.b),
]
# start of frame detection
vsync_r = Signal()
new_frame = Signal()
self.comb += new_frame.eq(self.vsync & ~vsync_r)
self.sync.pix += vsync_r.eq(self.vsync)
# pack pixels into words
cur_word = Signal(word_width)
cur_word_valid = Signal()
encoded_pixel = Signal(24)
self.comb += encoded_pixel.eq(Cat(self.b, self.g, self.r))
pack_factor = word_width//24
assert(pack_factor & (pack_factor - 1) == 0) # only support powers of 2
pack_counter = Signal(max=pack_factor)
self.sync.pix += [
If(self.vsync & ~vsync_r, fifo_in.parity.eq(~fifo_in.parity)),
vsync_r.eq(self.vsync)
cur_word_valid.eq(0),
If(new_frame,
pack_counter.eq(0)
).Elif(self.valid_i & self.de,
[If(pack_counter == (pack_factor-i-1),
cur_word[24*i:24*(i+1)].eq(encoded_pixel)) for i in range(pack_factor)],
Cat(pack_counter, cur_word_valid).eq(pack_counter + 1)
)
]
fifo = RenameClockDomains(AsyncFIFO(layout_len(frame_layout), 512),
# FIFO
fifo = RenameClockDomains(AsyncFIFO(word_layout, 512),
{"write": "pix", "read": "sys"})
self.submodules += fifo
self.comb += [
fifo.we.eq(fifo_stb),
fifo.din.eq(fifo_in.raw_bits()),
fifo.din.pixels.eq(cur_word),
fifo.we.eq(cur_word_valid)
]
self.sync.pix += If(new_frame, fifo.din.parity.eq(~fifo.din.parity))
self.comb += [
self.frame.stb.eq(fifo.readable),
self.frame.payload.raw_bits().eq(fifo.dout),
self.frame.payload.eq(fifo.dout),
fifo.re.eq(self.frame.ack),
self.busy.eq(0)
]
# overflow detection
pix_overflow = Signal()
pix_overflow_reset = Signal()

View File

@ -1,3 +1,2 @@
control_tokens = [0b1101010100, 0b0010101011, 0b0101010100, 0b1010101011]
channel_layout = [("d", 8), ("c", 2), ("de", 1)]
frame_layout = [("parity", 1), ("r", 8), ("g", 8), ("b", 8)]

View File

@ -5,8 +5,6 @@ from migen.bank.eventmanager import *
from migen.flow.actor import *
from migen.actorlib import dma_lasmi
from misoclib.dvisampler.common import frame_layout
# Slot status: EMPTY=0 LOADED=1 PENDING=2
class _Slot(Module, AutoCSR):
def __init__(self, addr_bits, alignment_bits):
@ -65,7 +63,8 @@ class DMA(Module):
bus_dw = lasmim.dw
alignment_bits = bits_for(bus_dw//8) - 1
self.frame = Sink(frame_layout)
fifo_word_width = 24*bus_dw//32
self.frame = Sink([("parity", 1), ("pixels", fifo_word_width)])
self._r_frame_size = CSRStorage(bus_aw + alignment_bits, alignment_bits=alignment_bits)
self.submodules._slot_array = _SlotArray(nslots, bus_aw, alignment_bits)
self.ev = self._slot_array.ev
@ -98,32 +97,23 @@ class DMA(Module):
)
]
# pack pixels into memory words
write_pixel = Signal()
last_pixel = Signal()
cur_memory_word = Signal(bus_dw)
encoded_pixel = Signal(32)
self.comb += [
encoded_pixel.eq(Cat(
self.frame.payload.b[6:], self.frame.payload.b,
self.frame.payload.g[6:], self.frame.payload.g,
self.frame.payload.r[6:], self.frame.payload.r))
]
pack_factor = bus_dw//32
assert(pack_factor & (pack_factor - 1) == 0) # only support powers of 2
pack_counter = Signal(max=pack_factor)
self.comb += last_pixel.eq(pack_counter == (pack_factor - 1))
self.sync += If(write_pixel,
[If(pack_counter == (pack_factor-i-1),
cur_memory_word[32*i:32*(i+1)].eq(encoded_pixel)) for i in range(pack_factor)],
pack_counter.eq(pack_counter + 1)
)
# 24bpp -> 32bpp
memory_word = Signal(bus_dw)
pixbits = []
for i in range(bus_dw//32):
for j in range(3):
b = (i*3+j)*8
pixbits.append(self.frame.payload.pixels[b+6:b+8])
pixbits.append(self.frame.payload.pixels[b:b+8])
pixbits.append(0)
pixbits.append(0)
self.comb += memory_word.eq(Cat(*pixbits))
# bus accessor
self.submodules._bus_accessor = dma_lasmi.Writer(lasmim)
self.comb += [
self._bus_accessor.address_data.payload.a.eq(current_address),
self._bus_accessor.address_data.payload.d.eq(cur_memory_word)
self._bus_accessor.address_data.payload.d.eq(memory_word)
]
# control FSM
@ -133,23 +123,15 @@ class DMA(Module):
fsm.act("WAIT_SOF",
reset_words.eq(1),
self.frame.ack.eq(~self._slot_array.address_valid | ~sof),
If(self._slot_array.address_valid & sof & self.frame.stb, NextState("TRANSFER_PIXEL"))
If(self._slot_array.address_valid & sof & self.frame.stb, NextState("TRANSFER_PIXELS"))
)
fsm.act("TRANSFER_PIXEL",
self.frame.ack.eq(1),
fsm.act("TRANSFER_PIXELS",
self.frame.ack.eq(self._bus_accessor.address_data.ack),
If(self.frame.stb,
write_pixel.eq(1),
If(last_pixel, NextState("TO_MEMORY"))
)
)
fsm.act("TO_MEMORY",
self._bus_accessor.address_data.stb.eq(1),
If(self._bus_accessor.address_data.ack,
count_word.eq(1),
If(last_word,
NextState("EOF")
).Else(
NextState("TRANSFER_PIXEL")
self._bus_accessor.address_data.stb.eq(1),
If(self._bus_accessor.address_data.ack,
count_word.eq(1),
If(last_word, NextState("EOF"))
)
)
)