phy/xgmii: handle IFG insertion in PHY, support deficit idle count

Because XGMII only allows start of frame characters to be placed on
lane 0 (first octet in a 32-bit XGMII bus word), when a packet's
length % 4 != 0, we can't transmit exactly 12 XGMII idle characters
inter-frame gap (the XGMII end of frame character counts towards the
inter-frame gap, while start of frame does not). Given we are required
to transmit a minimum of 12 bytes IFG, it's allowed to send packet
length % 4 bytes additional IFG bytes. However this would waste
precious bandwidth transmitting these characters.

Thus, 10Gbit/s Ethernet and above allow using the deficit idle count
mechanism. It allows to delete some idle characters, as long as an
average count of >= 12 bytes IFG is maintained. This is to be
implemented as a two bit counter as specified in IEEE802.3-2018,
section four, 46.3.1.4 Start control character alignment.

In practice, the previous implementation of the LiteEthPHYXGMIITX made
these issues even more prevalent: because the internal stream
interface is 64-bit wide and stream transactions always start aligned
to the first octet in a bus word, the previous primitive TX
implementation always started transmission on the first octet in the
64-bit XGMII bus word. The IFG inserter operated independently if the
PHY and thus made sure to maintain 12 bytes of IFG on the 64-bit
stream bus. This means that in a worst case scenario, the IFG could
grow to 23 octets. In applications such as Ethernet switches, the
consequences would be frequent buffer overruns or corrupt
transmissions.

Hence this commit introduces a IFG inserter in the LiteEthPHYXGMIITX
module itself. It is significantly more complex compared to the gap
inserter, but inserts the smallest legal gap as defined by
IEEE802.3. Furthermore, it optionally implements the deficit idle
count algorithm as described by Eric Lynskey of the UNH
InterOperability Lab1 to achieve an average IFG of 12 bytes.

Signed-off-by: Leon Schuermann <leon@is.currently.online>
This commit is contained in:
Leon Schuermann 2021-11-17 20:50:06 +01:00
parent 1bbd90ae4d
commit ea0a65d357
2 changed files with 382 additions and 31 deletions

View File

@ -125,7 +125,8 @@ class LiteEthMACCore(Module, AutoCSR):
if core_dw != 8:
tx_datapath.add_last_be()
# Gap insertion has to occurr in phy tx domain to ensure gap is correctly maintained
tx_datapath.add_gap()
if not getattr(phy, "integrated_ifg_inserter", False):
tx_datapath.add_gap()
tx_datapath.pipeline.append(phy)
self.submodules.tx_datapath = tx_datapath

View File

@ -17,66 +17,382 @@ XGMII_START = Constant(0xFB, bits_sign=8)
XGMII_END = Constant(0xFD, bits_sign=8)
class LiteEthPHYXGMIITX(Module):
def __init__(self, pads, dw):
def __init__(self, pads, dw, dic=True):
# Enforce 64-bit data path
assert dw == 64
# Sink for data to transmit
self.sink = sink = stream.Endpoint(eth_phy_description(dw))
# ---------- Generic signals ----------
# Masked last_be signal of current clock cycle. last_be should only be
# respected when last is also asserted.
masked_last_be = Signal.like(sink.last_be)
self.comb += [
If(sink.last,
masked_last_be.eq(sink.last_be),
),
]
# ---------- Inter-frame gap state ----------
# State to keep track of the current inter-frame gap we are required to
# maintain. We must take care to always have an inter-frame gap of at
# least 96 bits (12 bytes), with an exception for the deficit idle gap
# mechanism. Because XGMII transactions can only start on the first or
# fifth byte in a 64-bit bus word, it's sufficient to represent this as
# - 0: less than 4 bytes of IFG transmitted
# - 1: less than 8 bytes of IFG transmitted
# - 2: less than 12 bytes of IFG transmitted
# - 3: 12 or more bytes of IFG transmitted
current_ifg = Signal(max=4, reset=3)
next_ifg = Signal(max=4)
# Shortcut "functions" to add a 32-bit or 64-bit idle bus word to the
# current inter-frame gap without worring about wrapping, or to reset it
# (typically on the start of a new transmission). This can be useful to
# see the effect on the next_ifg value and thus make decisions about
# subsequent signal states (e.g. sink.ready).
ifg_reset = Signal()
ifg_add_double = Signal()
ifg_add_single = Signal()
self.comb += [
If(ifg_reset,
next_ifg.eq(0),
).Elif(ifg_add_single,
If(current_ifg < 3,
next_ifg.eq(current_ifg + 1)
),
).Elif(ifg_add_double,
If(current_ifg < 2,
next_ifg.eq(current_ifg + 2),
).Else(
next_ifg.eq(3),
),
).Else(
next_ifg.eq(current_ifg),
),
]
self.sync += current_ifg.eq(next_ifg)
# ---------- Deficit idle count mechanism state ----------
# Because XGMII only allows start of frame characters to be placed on
# lane 0 (first and fifth octet in a 64-bit bus word), when a packet's
# length % 4 != 0, we can't transmit exactly 12 XGMII idle characters
# inter-frame gap (the XGMII end of frame character counts towards the
# inter-frame gap, while start of frame does not). Given we are required
# to transmit a minimum of 12 bytes IFG, it's allowed to send packet
# length % 4 bytes additional IFG bytes. However this would waste
# precious bandwidth transmitting these characters.
#
# Thus, 10Gbit/s Ethernet and above allow using the deficit idle count
# mechanism. It allows to delete some idle characters, as long as an
# average count of >= 12 bytes IFG is maintained. This is to be
# implemented as a two bit counter as specified in IEEE802.3-2018,
# section four, 46.3.1.4 Start control character alignment.
#
# This module implements the deficit idle count algorithm as described
# by Eric Lynskey of the UNH InterOperability Lab[1]:
#
# | current | | | | |
# | count | 0 | 1 | 2 | 3 |
# |---------+-----+-------+-----+-------+-----+-------+-----+-------|
# | | | new | | new | | new | | new |
# | pkt % 4 | IFG | count | IFG | count | IFG | count | IFG | count |
# |---------+-----+-------+-----+-------+-----+-------+-----+-------|
# | 0 | 12 | 0 | 12 | 1 | 12 | 2 | 12 | 3 |
# | 1 | 11 | 1 | 11 | 2 | 11 | 3 | 15 | 0 |
# | 2 | 10 | 2 | 10 | 3 | 14 | 0 | 14 | 1 |
# | 3 | 9 | 3 | 13 | 0 | 13 | 1 | 13 | 2 |
#
# [1]: https://www.iol.unh.edu/sites/default/files/knowledgebase/10gec/10GbE_DIC.pdf
# Additional state to keep track of exactly how many bytes % 4 we've
# transmitted in the last packet. We need this information to judge
# whether we've had a sufficiently large IFG given the current DIC
# count. Value the range of [0; 3].
#
# If we disable the deficit idle count, we replace this with a constant
# of 0, meaning that we pretend to not have transmitted any additional
# IDLE characters. This should allow significant logic optimizations
# while having the same effect as not implementing DIC at all.
if dic:
last_packet_rem = Signal(max=4)
else:
last_packet_rem = Constant(0, bits_sign=2)
# Bounded counter of deleted XGMII idle characters. Must be within [0;
# 3]. If we disable the deficit idle count mechanism, this signal should
# not change. However, it's still present to avoid the logic below
# getting too complex.
current_dic = Signal(max=4, reset=3)
# ---------- Shifted transmit state ----------
# Whether the current transmission is shifted, meaning that the packet's
# transmission started on the fifth octect within the 64-bit bus
# word. As a consequence of the shifted transmission, given that we
# receive 64 valid bits from the sink, we need to store and delay the
# upper half of the current clock cycle's data to the next.
#
# This register is to be set when transitioning out of the IDLE
# state.
transmit_shifted = Signal()
# Upper half of the data of the previous clock cycle.
prev_valid_data = Signal(dw)
prev_valid_last_be = Signal(dw // 8)
self.sync += [
If(sink.valid & sink.ready,
prev_valid_data.eq(sink.data),
If(sink.last,
prev_valid_last_be.eq(masked_last_be)
).Else(
prev_valid_last_be.eq(0),
),
),
]
# Previous clock cycle sink valid signal
prev_valid = Signal()
self.sync += prev_valid.eq(sink.valid)
# Adjusted sink data & last_be. If our transmission is shifted, this
# will contain the upper-half of the previous and lower-half of the
# current clock cycle. Otherwise, simply equal to data and the masked
# last_be.
adjusted_sink_valid = Signal()
adjusted_sink_valid_data = Signal.like(sink.data)
adjusted_sink_valid_last_be = Signal.like(sink.last_be)
self.comb += [
If(transmit_shifted,
# Because we are injecting data from the previous cycle, we need
# to respect it's valid. It's fine that adjusted_sink_valid
# therefore is deasserted for the very first bus word, given
# this is handled in the IDLE fsm state still. This assumes a
# non-hostile sink where valid is constantly asserted during a
# single transmission.
adjusted_sink_valid.eq(prev_valid),
adjusted_sink_valid_data.eq(Cat(
prev_valid_data[(dw // 2):],
sink.data[:(dw // 2)],
)),
adjusted_sink_valid_last_be.eq(Cat(
prev_valid_last_be[(dw // 8 // 2):],
masked_last_be[:(dw // 8 // 2)],
)),
).Else(
adjusted_sink_valid.eq(sink.valid),
adjusted_sink_valid_data.eq(sink.data),
adjusted_sink_valid_last_be.eq(masked_last_be),
),
]
# ---------- XGMII transmission logic ----------
# Transmit FSM
self.submodules.fsm = fsm = FSM(reset_state="IDLE")
# This block will be executed by the FSM below in the IDLE state, when
# it's time to start a transmission aligned on the FIRST byte in a
# 64-bit bus word. This can happen both because we've waited the 12 byte
# IFG and coincidentally the first byte is the next valid start point,
# or because we reduced the IFG to 8 bytes because of the deficit idle
# count mechanism. Thus have it as a reusable component here.
unshifted_idle_transmit = [
# Currently idling, but a new frame is ready for transmission
# and we had at least the full IFG idle before. Thus transmit
# the preamble, but replace the first byte with the XGMII start
# of frame control character. Accept more data.
ifg_reset.eq(1),
pads.tx_ctl.eq(0x01),
pads.tx_data.eq(Cat(XGMII_START, sink.data[8:dw])),
NextValue(transmit_shifted, 0),
NextValue(sink.ready, 1),
NextState("TRANSMIT"),
]
# This block will be executed by the FSM below in the IDLE state, when
# it's time to start a transmission aligned on the FIFTH byte in a
# 64-bit bus word. This can happen either because we've waited the 8
# byte IFG and need to insert only four bytes more in this cycle, or
# because the deficit idle count mechanism allows transmit with a
# smaller IFG (e.g. 1 bits packet remainder + 4 bytes TRANSMIT IFG in
# previous cycle + 4 bytes IDLE ID in current cycle = 9 bytes total
# IFG).
shifted_idle_transmit = [
# Currently idling, but a new frame is ready for transmission and
# there is only 4 bytes missing in the IFG (or we have created an
# acceptable IFG deficit). Thus transmit the preamble on the second
# 32-bit bus word, but replace the first byte with the XGMII start
# of frame control character. Accept more data.
pads.tx_ctl.eq(0x1F),
pads.tx_data.eq(Cat(
Replicate(XGMII_IDLE, 4),
XGMII_START,
sink.data[8:(dw // 2)],
)),
ifg_reset.eq(1),
NextValue(transmit_shifted, 1),
NextValue(sink.ready, 1),
NextState("TRANSMIT"),
]
fsm.act("IDLE",
If(sink.valid,
# Currently idling, but a new frame is ready for
# transmission. Thus transmit the preamble, but replace the
# first byte with the XGMII start of frame control
# character. Accept more data.
pads.tx_ctl.eq(0x01),
pads.tx_data.eq(Cat(XGMII_START, sink.data[8:dw])),
NextValue(sink.ready, 1),
NextState("TRANSMIT"),
If(sink.valid & (current_ifg == 3),
# Branch A: we've transmitted at least the full 12 bytes
# IFG. This means that we can unconditionally start transmission
# on the first octet. In addition to that, we may have inserted
# some extra XGMII, thus we can reduce the deficit.
*unshifted_idle_transmit,
If(current_dic - last_packet_rem < 0,
NextValue(current_dic, 0),
).Else(
NextValue(current_dic, current_dic - last_packet_rem),
)
).Elif(sink.valid & (current_ifg == 2),
# Branch B: we've transmitted at least 8 bytes of IFG. This
# means that we can either, depending on the DIC start
# transmission on the first or fith octect. Manipulate the DIC
# count accordingly.
If((last_packet_rem != 0)
& (current_dic + last_packet_rem <= 3),
# We've taken some extra IFG bytes (added to the deficit)
*unshifted_idle_transmit,
NextValue(current_dic, current_dic + last_packet_rem),
).Else(
# We might have inserted some extra IFG bytes (subtracted
# from the deficit)
*shifted_idle_transmit,
If(current_dic - last_packet_rem < 0,
NextValue(current_dic, 0),
).Else(
NextValue(current_dic, current_dic - last_packet_rem),
)
),
).Elif(sink.valid & (current_ifg == 1) & (last_packet_rem != 0)
& (current_dic + last_packet_rem <= 3),
# Branch C: we've transmitted at least 4 bytes of IFG. Whether
# we can start a new transmission here depends on the DIC. In
# any case, we're deleting at least one XGMII idle character,
# which we need to keep track of. Furthermore, transmission can
# only ever start on the fifth octect here.
*shifted_idle_transmit,
NextValue(current_dic, current_dic + last_packet_rem),
).Else(
# Idling, transmit XGMII IDLE control characters
# only. Accept more data.
# Idling, transmit XGMII IDLE control characters only and add
# them to the IFG.
pads.tx_ctl.eq(0xFF),
pads.tx_data.eq(Cat(*([XGMII_IDLE] * 8))),
NextValue(sink.ready, 1),
ifg_add_double.eq(1),
# Accept more data if we've had a sufficiently large inter-frame
# gap (accounting for deficit idle count). For this we need to
# determine whether the next sink.valid clock cycle will take a
# given branch of A, B or C.
If((next_ifg >= 2)
| ((next_ifg == 1) & (last_packet_rem != 0)
& (current_dic + last_packet_rem <= 3)),
# Branch A, B or C will be taken as soon as sink.valid
# again, thus accept more data.
NextValue(sink.ready, 1),
).Else(
# We haven't transmitted a sufficient IFG. The next
# sink.valid clock cycle will not start a transmission.
NextValue(sink.ready, 0),
),
# If we've remained in IDLE because the sink is not yet valid,
# even though the full IFG has been sent already, remove any
# deficit idle count. We've made up for that by now.
If(current_ifg >= 2,
NextValue(current_dic, 0),
),
NextState("IDLE"),
)
)
# How many bytes % 4 we've transmitted in the current packet. This
# signal is to be asserted when the packet ends in the current clock
# cycle.
#
# If we disable the deficit idle count, we replace this with a constant
# of 0, meaning that we pretend to not have transmitted any additional
# IDLE characters. This should allow significant logic optimizations.
if dic:
current_packet_rem = Signal(max=4)
else:
current_packet_rem = Constant(0, bits_sign=2)
# Wether the current transmission must be ended in the next clock
# cycle. This might be required if we haven't transmitted the XGMII end
# of frame control character, but send all other data of the packet.
end_transmission = Signal()
fsm.act("TRANSMIT",
# Check whether the data is still valid first or we are are not
# ready to accept a new transmission.
If(~sink.valid | ~sink.ready,
# Data isn't valid, or we aren't ready to accept a new
# transmission yet as another one has ended but the XGMII end of
# frame control character has not been transmitted. We must
# transmit the end of frame marker and return to
# afterwards. Immediately accept more data, given we have
# transmitted the end of frame control character.
If(end_transmission | ~adjusted_sink_valid,
# Data isn't valid, but we're still in the transmit state. This
# can happen because we've finished transmitting all packet
# data, but must still transmit the XGMII end of frame control
# character. Thus put this control character and IDLE on the
# line, return to IDLE afterwards.
pads.tx_ctl.eq(0xFF),
pads.tx_data.eq(Cat(XGMII_END, Replicate(XGMII_IDLE, 7))),
# Also, we're transmitting 64 bits worth of idle characters.
ifg_add_double.eq(1),
# We're transmitting 8 bytes of IFG in this cycle. Thus we know
# that in the next cycle we can for sure start a new
# transmission, irrespective of whether we use DIC (either on
# the first or fifth byte in the 64-bit word). Thus set
# sink.ready accordingly.
NextValue(sink.ready, 1),
# Packet transmission is complete, return to IDLE and reset the
# end_transmission register.
NextValue(end_transmission, 0),
NextState("IDLE"),
).Else(
# The data is valid. For each byte, determine whether it is
# valid or must be an XGMII idle or end of frame control
# character based on the value of last_be.
*[
If(~sink.last | (sink.last_be >= (1 << i)),
If((adjusted_sink_valid_last_be == 0)
| (adjusted_sink_valid_last_be >= (1 << i)),
# Either not the last data word or last_be indicates
# this byte is still valid
pads.tx_ctl[i].eq(0),
pads.tx_data[8*i:8*(i+1)].eq(sink.data[8*i:8*(i+1)]),
).Elif((sink.last_be == (1 << (i - 1))) if i > 0 else 0,
pads.tx_data[8*i:8*(i+1)].eq(
adjusted_sink_valid_data[8*i:8*(i+1)]
),
).Elif((adjusted_sink_valid_last_be == (1 << (i - 1)))
if i > 0 else 0,
# last_be indicates that this byte is the first one
# which is no longer valid, hence transmit the XGMII end
# of frame character
pads.tx_ctl[i].eq(1),
pads.tx_data[8*i:8*(i+1)].eq(XGMII_END),
# Also, starting from this character, the inter-frame
# gap starts. Depending on where we are in the bus word
# (index 0 to 4) we can already count cycle as one
# 32-bit IFG step (the XGMII end of frame character
# counts towards the IFG).
If(i < 5,
ifg_add_single.eq(1),
),
# If the DIC mechanism is enabled, furthermore keep
# track of the remainder (mod 4) of IDLE bytes being
# sent.
*([
current_packet_rem.eq(i % 4),
NextValue(last_packet_rem, i % 4),
] if dic else []),
).Else(
# We must've transmitted the XGMII end of frame control
# character, all other bytes must be XGMII idle control
@ -93,15 +409,42 @@ class LiteEthPHYXGMIITX(Module):
# XGMII bus word containing the XGMII end of frame and idle
# control characters. This happens if we remain in the TRANSMIT
# state.
If(~sink.last,
NextValue(sink.ready, 1),
If(adjusted_sink_valid_last_be == 0,
# This hasn't been the last bus word. However, before we can
# tell the data sink to send us additional data, in case
# we're performing a shifted transmission, we must see
# whether the current sink data word already indicates the
# end of data in it's upper half. If so, we must not request
# additional data. Otherwise we could loose valid data, as
# we're transmitting the IFG first.
If(transmit_shifted & sink.last
& ((sink.last_be & 0xF0) != 0),
# We're in a shifted transmit and already have received
# the last data bytes from the sink.
NextValue(sink.ready, 0),
).Else(
# Everything's good, the sink hasn't yet asserted last.
NextValue(sink.ready, 1),
),
NextState("TRANSMIT"),
).Elif(sink.last_be == (1 << 7),
# Last data word, but all bytes were valid.
).Elif(adjusted_sink_valid_last_be == (1 << 7),
# Last data word, but all bytes were valid. Thus we still
# need to transmit the XGMII end control character.
NextValue(end_transmission, 1),
NextValue(sink.ready, 0),
NextState("TRANSMIT"),
).Else(
NextValue(sink.ready, 1),
# We did already transmit the XGMII end control
# character. Depending on the interframegap sent as part of
# this cycle and the current deficit idle count, we might
# already be able to accept data in the next clock cycle.
If((next_ifg >= 2)
| ((next_ifg == 1) & (last_packet_rem != 0)
& (current_dic + last_packet_rem <= 3)),
NextValue(sink.ready, 1),
).Else(
NextValue(sink.ready, 0),
),
NextState("IDLE"),
)
)
@ -310,12 +653,19 @@ class LiteEthPHYXGMII(Module, AutoCSR):
pads,
model=False,
dw=64,
with_hw_init_reset=True):
with_hw_init_reset=True,
dic=True,
):
self.dw = dw
self.cd_eth_tx, self.cd_eth_rx = "eth_tx", "eth_rx"
self.integrated_ifg_inserter = True
self.submodules.crg = LiteEthPHYXGMIICRG(clock_pads, model)
self.submodules.tx = ClockDomainsRenamer(self.cd_eth_tx)(
LiteEthPHYXGMIITX(pads, self.dw))
LiteEthPHYXGMIITX(
pads,
self.dw,
dic=dic,
))
self.submodules.rx = ClockDomainsRenamer(self.cd_eth_rx)(
LiteEthPHYXGMIIRX(pads, self.dw))
self.sink, self.source = self.tx.sink, self.rx.source