dvisampler: pack pixels in pixel clock domain to improve performance
This commit is contained in:
parent
d225bdf362
commit
6f990a017e
|
@ -12,7 +12,7 @@ from misoclib.dvisampler.analysis import SyncPolarity, ResolutionDetection, Fram
|
|||
from misoclib.dvisampler.dma import DMA
|
||||
|
||||
class DVISampler(Module, AutoCSR):
|
||||
def __init__(self, pads, asmiport, n_dma_slots=2):
|
||||
def __init__(self, pads, lasmim, n_dma_slots=2):
|
||||
self.submodules.edid = EDID(pads)
|
||||
self.submodules.clocking = Clocking(pads)
|
||||
|
||||
|
@ -62,7 +62,7 @@ class DVISampler(Module, AutoCSR):
|
|||
self.resdetection.vsync.eq(self.syncpol.vsync)
|
||||
]
|
||||
|
||||
self.submodules.frame = FrameExtraction()
|
||||
self.submodules.frame = FrameExtraction(24*lasmim.dw//32)
|
||||
self.comb += [
|
||||
self.frame.valid_i.eq(self.syncpol.valid_o),
|
||||
self.frame.de.eq(self.syncpol.de),
|
||||
|
@ -72,7 +72,7 @@ class DVISampler(Module, AutoCSR):
|
|||
self.frame.b.eq(self.syncpol.b)
|
||||
]
|
||||
|
||||
self.submodules.dma = DMA(asmiport, n_dma_slots)
|
||||
self.submodules.dma = DMA(lasmim, n_dma_slots)
|
||||
self.comb += self.frame.frame.connect(self.dma.frame)
|
||||
self.ev = self.dma.ev
|
||||
|
||||
|
|
|
@ -5,7 +5,7 @@ from migen.genlib.record import Record
|
|||
from migen.bank.description import *
|
||||
from migen.flow.actor import *
|
||||
|
||||
from misoclib.dvisampler.common import channel_layout, frame_layout
|
||||
from misoclib.dvisampler.common import channel_layout
|
||||
|
||||
class SyncPolarity(Module):
|
||||
def __init__(self):
|
||||
|
@ -106,7 +106,7 @@ class ResolutionDetection(Module, AutoCSR):
|
|||
self.specials += MultiReg(vcounter_st, self._vres.status)
|
||||
|
||||
class FrameExtraction(Module, AutoCSR):
|
||||
def __init__(self):
|
||||
def __init__(self, word_width):
|
||||
# in pix clock domain
|
||||
self.valid_i = Signal()
|
||||
self.vsync = Signal()
|
||||
|
@ -116,35 +116,51 @@ class FrameExtraction(Module, AutoCSR):
|
|||
self.b = Signal(8)
|
||||
|
||||
# in sys clock domain
|
||||
self.frame = Source(frame_layout)
|
||||
word_layout = [("parity", 1), ("pixels", word_width)]
|
||||
self.frame = Source(word_layout)
|
||||
self.busy = Signal()
|
||||
|
||||
self._r_overflow = CSR()
|
||||
|
||||
###
|
||||
|
||||
fifo_stb = Signal()
|
||||
fifo_in = Record(frame_layout)
|
||||
self.comb += [
|
||||
fifo_stb.eq(self.valid_i & self.de),
|
||||
fifo_in.r.eq(self.r),
|
||||
fifo_in.g.eq(self.g),
|
||||
fifo_in.b.eq(self.b),
|
||||
]
|
||||
# start of frame detection
|
||||
vsync_r = Signal()
|
||||
new_frame = Signal()
|
||||
self.comb += new_frame.eq(self.vsync & ~vsync_r)
|
||||
self.sync.pix += vsync_r.eq(self.vsync)
|
||||
|
||||
# pack pixels into words
|
||||
cur_word = Signal(word_width)
|
||||
cur_word_valid = Signal()
|
||||
encoded_pixel = Signal(24)
|
||||
self.comb += encoded_pixel.eq(Cat(self.b, self.g, self.r))
|
||||
pack_factor = word_width//24
|
||||
assert(pack_factor & (pack_factor - 1) == 0) # only support powers of 2
|
||||
pack_counter = Signal(max=pack_factor)
|
||||
self.sync.pix += [
|
||||
If(self.vsync & ~vsync_r, fifo_in.parity.eq(~fifo_in.parity)),
|
||||
vsync_r.eq(self.vsync)
|
||||
cur_word_valid.eq(0),
|
||||
If(new_frame,
|
||||
pack_counter.eq(0)
|
||||
).Elif(self.valid_i & self.de,
|
||||
[If(pack_counter == (pack_factor-i-1),
|
||||
cur_word[24*i:24*(i+1)].eq(encoded_pixel)) for i in range(pack_factor)],
|
||||
Cat(pack_counter, cur_word_valid).eq(pack_counter + 1)
|
||||
)
|
||||
]
|
||||
|
||||
fifo = RenameClockDomains(AsyncFIFO(layout_len(frame_layout), 512),
|
||||
# FIFO
|
||||
fifo = RenameClockDomains(AsyncFIFO(word_layout, 512),
|
||||
{"write": "pix", "read": "sys"})
|
||||
self.submodules += fifo
|
||||
self.comb += [
|
||||
fifo.we.eq(fifo_stb),
|
||||
fifo.din.eq(fifo_in.raw_bits()),
|
||||
fifo.din.pixels.eq(cur_word),
|
||||
fifo.we.eq(cur_word_valid)
|
||||
]
|
||||
self.sync.pix += If(new_frame, fifo.din.parity.eq(~fifo.din.parity))
|
||||
self.comb += [
|
||||
self.frame.stb.eq(fifo.readable),
|
||||
self.frame.payload.raw_bits().eq(fifo.dout),
|
||||
self.frame.payload.eq(fifo.dout),
|
||||
fifo.re.eq(self.frame.ack),
|
||||
self.busy.eq(0)
|
||||
]
|
||||
|
|
|
@ -1,3 +1,2 @@
|
|||
control_tokens = [0b1101010100, 0b0010101011, 0b0101010100, 0b1010101011]
|
||||
channel_layout = [("d", 8), ("c", 2), ("de", 1)]
|
||||
frame_layout = [("parity", 1), ("r", 8), ("g", 8), ("b", 8)]
|
||||
|
|
|
@ -5,8 +5,6 @@ from migen.bank.eventmanager import *
|
|||
from migen.flow.actor import *
|
||||
from migen.actorlib import dma_lasmi
|
||||
|
||||
from misoclib.dvisampler.common import frame_layout
|
||||
|
||||
# Slot status: EMPTY=0 LOADED=1 PENDING=2
|
||||
class _Slot(Module, AutoCSR):
|
||||
def __init__(self, addr_bits, alignment_bits):
|
||||
|
@ -65,7 +63,8 @@ class DMA(Module):
|
|||
bus_dw = lasmim.dw
|
||||
alignment_bits = bits_for(bus_dw//8) - 1
|
||||
|
||||
self.frame = Sink(frame_layout)
|
||||
fifo_word_width = 24*bus_dw//32
|
||||
self.frame = Sink([("parity", 1), ("pixels", fifo_word_width)])
|
||||
self._r_frame_size = CSRStorage(bus_aw + alignment_bits, alignment_bits=alignment_bits)
|
||||
self.submodules._slot_array = _SlotArray(nslots, bus_aw, alignment_bits)
|
||||
self.ev = self._slot_array.ev
|
||||
|
@ -98,32 +97,23 @@ class DMA(Module):
|
|||
)
|
||||
]
|
||||
|
||||
# pack pixels into memory words
|
||||
write_pixel = Signal()
|
||||
last_pixel = Signal()
|
||||
cur_memory_word = Signal(bus_dw)
|
||||
encoded_pixel = Signal(32)
|
||||
self.comb += [
|
||||
encoded_pixel.eq(Cat(
|
||||
self.frame.payload.b[6:], self.frame.payload.b,
|
||||
self.frame.payload.g[6:], self.frame.payload.g,
|
||||
self.frame.payload.r[6:], self.frame.payload.r))
|
||||
]
|
||||
pack_factor = bus_dw//32
|
||||
assert(pack_factor & (pack_factor - 1) == 0) # only support powers of 2
|
||||
pack_counter = Signal(max=pack_factor)
|
||||
self.comb += last_pixel.eq(pack_counter == (pack_factor - 1))
|
||||
self.sync += If(write_pixel,
|
||||
[If(pack_counter == (pack_factor-i-1),
|
||||
cur_memory_word[32*i:32*(i+1)].eq(encoded_pixel)) for i in range(pack_factor)],
|
||||
pack_counter.eq(pack_counter + 1)
|
||||
)
|
||||
# 24bpp -> 32bpp
|
||||
memory_word = Signal(bus_dw)
|
||||
pixbits = []
|
||||
for i in range(bus_dw//32):
|
||||
for j in range(3):
|
||||
b = (i*3+j)*8
|
||||
pixbits.append(self.frame.payload.pixels[b+6:b+8])
|
||||
pixbits.append(self.frame.payload.pixels[b:b+8])
|
||||
pixbits.append(0)
|
||||
pixbits.append(0)
|
||||
self.comb += memory_word.eq(Cat(*pixbits))
|
||||
|
||||
# bus accessor
|
||||
self.submodules._bus_accessor = dma_lasmi.Writer(lasmim)
|
||||
self.comb += [
|
||||
self._bus_accessor.address_data.payload.a.eq(current_address),
|
||||
self._bus_accessor.address_data.payload.d.eq(cur_memory_word)
|
||||
self._bus_accessor.address_data.payload.d.eq(memory_word)
|
||||
]
|
||||
|
||||
# control FSM
|
||||
|
@ -133,23 +123,15 @@ class DMA(Module):
|
|||
fsm.act("WAIT_SOF",
|
||||
reset_words.eq(1),
|
||||
self.frame.ack.eq(~self._slot_array.address_valid | ~sof),
|
||||
If(self._slot_array.address_valid & sof & self.frame.stb, NextState("TRANSFER_PIXEL"))
|
||||
If(self._slot_array.address_valid & sof & self.frame.stb, NextState("TRANSFER_PIXELS"))
|
||||
)
|
||||
fsm.act("TRANSFER_PIXEL",
|
||||
self.frame.ack.eq(1),
|
||||
fsm.act("TRANSFER_PIXELS",
|
||||
self.frame.ack.eq(self._bus_accessor.address_data.ack),
|
||||
If(self.frame.stb,
|
||||
write_pixel.eq(1),
|
||||
If(last_pixel, NextState("TO_MEMORY"))
|
||||
)
|
||||
)
|
||||
fsm.act("TO_MEMORY",
|
||||
self._bus_accessor.address_data.stb.eq(1),
|
||||
If(self._bus_accessor.address_data.ack,
|
||||
count_word.eq(1),
|
||||
If(last_word,
|
||||
NextState("EOF")
|
||||
).Else(
|
||||
NextState("TRANSFER_PIXEL")
|
||||
If(last_word, NextState("EOF"))
|
||||
)
|
||||
)
|
||||
)
|
||||
|
|
Loading…
Reference in New Issue