From bedbef8ad3e75a304af6361af235302bcc61d06b Mon Sep 17 00:00:00 2001
From: hc <hc@nodka.com>
Date: Tue, 14 May 2024 06:39:01 +0000
Subject: [PATCH] 修改内核路径
---
kernel/drivers/net/ethernet/intel/igc/igc_main.c | 296 ++++++++++++++++++++++++++++++++++++++++++++++------------
1 files changed, 234 insertions(+), 62 deletions(-)
diff --git a/kernel/drivers/net/ethernet/intel/igc/igc_main.c b/kernel/drivers/net/ethernet/intel/igc/igc_main.c
index e7ffe63..631ce79 100644
--- a/kernel/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/kernel/drivers/net/ethernet/intel/igc/igc_main.c
@@ -600,7 +600,6 @@
/* disable the queue */
wr32(IGC_TXDCTL(reg_idx), 0);
wrfl();
- mdelay(10);
wr32(IGC_TDLEN(reg_idx),
ring->count * sizeof(union igc_adv_tx_desc));
@@ -898,25 +897,118 @@
return netdev_mc_count(netdev);
}
-static __le32 igc_tx_launchtime(struct igc_adapter *adapter, ktime_t txtime)
+static __le32 igc_tx_launchtime(struct igc_ring *ring, ktime_t txtime,
+ bool *first_flag, bool *insert_empty)
{
+ struct igc_adapter *adapter = netdev_priv(ring->netdev);
ktime_t cycle_time = adapter->cycle_time;
ktime_t base_time = adapter->base_time;
- u32 launchtime;
+ ktime_t now = ktime_get_clocktai();
+ ktime_t baset_est, end_of_cycle;
+ s32 launchtime;
+ s64 n;
- /* FIXME: when using ETF together with taprio, we may have a
- * case where 'delta' is larger than the cycle_time, this may
- * cause problems if we don't read the current value of
- * IGC_BASET, as the value writen into the launchtime
- * descriptor field may be misinterpreted.
+ n = div64_s64(ktime_sub_ns(now, base_time), cycle_time);
+
+ baset_est = ktime_add_ns(base_time, cycle_time * (n));
+ end_of_cycle = ktime_add_ns(baset_est, cycle_time);
+
+ if (ktime_compare(txtime, end_of_cycle) >= 0) {
+ if (baset_est != ring->last_ff_cycle) {
+ *first_flag = true;
+ ring->last_ff_cycle = baset_est;
+
+ if (ktime_compare(end_of_cycle, ring->last_tx_cycle) > 0)
+ *insert_empty = true;
+ }
+ }
+
+ /* Introducing a window at end of cycle on which packets
+ * potentially not honor launchtime. Window of 5us chosen
+ * considering software update the tail pointer and packets
+ * are dma'ed to packet buffer.
*/
- div_s64_rem(ktime_sub_ns(txtime, base_time), cycle_time, &launchtime);
+ if ((ktime_sub_ns(end_of_cycle, now) < 5 * NSEC_PER_USEC))
+ netdev_warn(ring->netdev, "Packet with txtime=%llu may not be honoured\n",
+ txtime);
+
+ ring->last_tx_cycle = end_of_cycle;
+
+ launchtime = ktime_sub_ns(txtime, baset_est);
+ if (launchtime > 0)
+ div_s64_rem(launchtime, cycle_time, &launchtime);
+ else
+ launchtime = 0;
return cpu_to_le32(launchtime);
}
+static int igc_init_empty_frame(struct igc_ring *ring,
+ struct igc_tx_buffer *buffer,
+ struct sk_buff *skb)
+{
+ unsigned int size;
+ dma_addr_t dma;
+
+ size = skb_headlen(skb);
+
+ dma = dma_map_single(ring->dev, skb->data, size, DMA_TO_DEVICE);
+ if (dma_mapping_error(ring->dev, dma)) {
+ netdev_err_once(ring->netdev, "Failed to map DMA for TX\n");
+ return -ENOMEM;
+ }
+
+ buffer->skb = skb;
+ buffer->protocol = 0;
+ buffer->bytecount = skb->len;
+ buffer->gso_segs = 1;
+ buffer->time_stamp = jiffies;
+ dma_unmap_len_set(buffer, len, skb->len);
+ dma_unmap_addr_set(buffer, dma, dma);
+
+ return 0;
+}
+
+static int igc_init_tx_empty_descriptor(struct igc_ring *ring,
+ struct sk_buff *skb,
+ struct igc_tx_buffer *first)
+{
+ union igc_adv_tx_desc *desc;
+ u32 cmd_type, olinfo_status;
+ int err;
+
+ if (!igc_desc_unused(ring))
+ return -EBUSY;
+
+ err = igc_init_empty_frame(ring, first, skb);
+ if (err)
+ return err;
+
+ cmd_type = IGC_ADVTXD_DTYP_DATA | IGC_ADVTXD_DCMD_DEXT |
+ IGC_ADVTXD_DCMD_IFCS | IGC_TXD_DCMD |
+ first->bytecount;
+ olinfo_status = first->bytecount << IGC_ADVTXD_PAYLEN_SHIFT;
+
+ desc = IGC_TX_DESC(ring, ring->next_to_use);
+ desc->read.cmd_type_len = cpu_to_le32(cmd_type);
+ desc->read.olinfo_status = cpu_to_le32(olinfo_status);
+ desc->read.buffer_addr = cpu_to_le64(dma_unmap_addr(first, dma));
+
+ netdev_tx_sent_queue(txring_txq(ring), skb->len);
+
+ first->next_to_watch = desc;
+
+ ring->next_to_use++;
+ if (ring->next_to_use == ring->count)
+ ring->next_to_use = 0;
+
+ return 0;
+}
+
+#define IGC_EMPTY_FRAME_SIZE 60
+
static void igc_tx_ctxtdesc(struct igc_ring *tx_ring,
- struct igc_tx_buffer *first,
+ __le32 launch_time, bool first_flag,
u32 vlan_macip_lens, u32 type_tucmd,
u32 mss_l4len_idx)
{
@@ -935,35 +1027,17 @@
if (test_bit(IGC_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
mss_l4len_idx |= tx_ring->reg_idx << 4;
+ if (first_flag)
+ mss_l4len_idx |= IGC_ADVTXD_TSN_CNTX_FIRST;
+
context_desc->vlan_macip_lens = cpu_to_le32(vlan_macip_lens);
context_desc->type_tucmd_mlhl = cpu_to_le32(type_tucmd);
context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);
-
- /* We assume there is always a valid Tx time available. Invalid times
- * should have been handled by the upper layers.
- */
- if (tx_ring->launchtime_enable) {
- struct igc_adapter *adapter = netdev_priv(tx_ring->netdev);
- ktime_t txtime = first->skb->tstamp;
-
- first->skb->tstamp = ktime_set(0, 0);
- context_desc->launch_time = igc_tx_launchtime(adapter,
- txtime);
- } else {
- context_desc->launch_time = 0;
- }
+ context_desc->launch_time = launch_time;
}
-static inline bool igc_ipv6_csum_is_sctp(struct sk_buff *skb)
-{
- unsigned int offset = 0;
-
- ipv6_find_hdr(skb, &offset, IPPROTO_SCTP, NULL, NULL);
-
- return offset == skb_checksum_start_offset(skb);
-}
-
-static void igc_tx_csum(struct igc_ring *tx_ring, struct igc_tx_buffer *first)
+static void igc_tx_csum(struct igc_ring *tx_ring, struct igc_tx_buffer *first,
+ __le32 launch_time, bool first_flag)
{
struct sk_buff *skb = first->skb;
u32 vlan_macip_lens = 0;
@@ -985,10 +1059,7 @@
break;
case offsetof(struct sctphdr, checksum):
/* validate that this is actually an SCTP request */
- if ((first->protocol == htons(ETH_P_IP) &&
- (ip_hdr(skb)->protocol == IPPROTO_SCTP)) ||
- (first->protocol == htons(ETH_P_IPV6) &&
- igc_ipv6_csum_is_sctp(skb))) {
+ if (skb_csum_is_sctp(skb)) {
type_tucmd = IGC_ADVTXD_TUCMD_L4T_SCTP;
break;
}
@@ -1006,7 +1077,8 @@
vlan_macip_lens |= skb_network_offset(skb) << IGC_ADVTXD_MACLEN_SHIFT;
vlan_macip_lens |= first->tx_flags & IGC_TX_FLAGS_VLAN_MASK;
- igc_tx_ctxtdesc(tx_ring, first, vlan_macip_lens, type_tucmd, 0);
+ igc_tx_ctxtdesc(tx_ring, launch_time, first_flag,
+ vlan_macip_lens, type_tucmd, 0);
}
static int __igc_maybe_stop_tx(struct igc_ring *tx_ring, const u16 size)
@@ -1230,6 +1302,7 @@
static int igc_tso(struct igc_ring *tx_ring,
struct igc_tx_buffer *first,
+ __le32 launch_time, bool first_flag,
u8 *hdr_len)
{
u32 vlan_macip_lens, type_tucmd, mss_l4len_idx;
@@ -1316,8 +1389,8 @@
vlan_macip_lens |= (ip.hdr - skb->data) << IGC_ADVTXD_MACLEN_SHIFT;
vlan_macip_lens |= first->tx_flags & IGC_TX_FLAGS_VLAN_MASK;
- igc_tx_ctxtdesc(tx_ring, first, vlan_macip_lens,
- type_tucmd, mss_l4len_idx);
+ igc_tx_ctxtdesc(tx_ring, launch_time, first_flag,
+ vlan_macip_lens, type_tucmd, mss_l4len_idx);
return 1;
}
@@ -1325,11 +1398,14 @@
static netdev_tx_t igc_xmit_frame_ring(struct sk_buff *skb,
struct igc_ring *tx_ring)
{
+ bool first_flag = false, insert_empty = false;
u16 count = TXD_USE_COUNT(skb_headlen(skb));
__be16 protocol = vlan_get_protocol(skb);
struct igc_tx_buffer *first;
+ __le32 launch_time = 0;
u32 tx_flags = 0;
unsigned short f;
+ ktime_t txtime;
u8 hdr_len = 0;
int tso = 0;
@@ -1343,11 +1419,40 @@
count += TXD_USE_COUNT(skb_frag_size(
&skb_shinfo(skb)->frags[f]));
- if (igc_maybe_stop_tx(tx_ring, count + 3)) {
+ if (igc_maybe_stop_tx(tx_ring, count + 5)) {
/* this is a hard error */
return NETDEV_TX_BUSY;
}
+ if (!tx_ring->launchtime_enable)
+ goto done;
+
+ txtime = skb->tstamp;
+ skb->tstamp = ktime_set(0, 0);
+ launch_time = igc_tx_launchtime(tx_ring, txtime, &first_flag, &insert_empty);
+
+ if (insert_empty) {
+ struct igc_tx_buffer *empty_info;
+ struct sk_buff *empty;
+ void *data;
+
+ empty_info = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
+ empty = alloc_skb(IGC_EMPTY_FRAME_SIZE, GFP_ATOMIC);
+ if (!empty)
+ goto done;
+
+ data = skb_put(empty, IGC_EMPTY_FRAME_SIZE);
+ memset(data, 0, IGC_EMPTY_FRAME_SIZE);
+
+ igc_tx_ctxtdesc(tx_ring, 0, false, 0, 0, 0);
+
+ if (igc_init_tx_empty_descriptor(tx_ring,
+ empty,
+ empty_info) < 0)
+ dev_kfree_skb_any(empty);
+ }
+
+done:
/* record the location of the first descriptor for this packet */
first = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
first->skb = skb;
@@ -1361,9 +1466,10 @@
* the other timer registers before skipping the
* timestamping request.
*/
- if (adapter->tstamp_config.tx_type == HWTSTAMP_TX_ON &&
- !test_and_set_bit_lock(__IGC_PTP_TX_IN_PROGRESS,
- &adapter->state)) {
+ unsigned long flags;
+
+ spin_lock_irqsave(&adapter->ptp_tx_lock, flags);
+ if (adapter->tstamp_config.tx_type == HWTSTAMP_TX_ON && !adapter->ptp_tx_skb) {
skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
tx_flags |= IGC_TX_FLAGS_TSTAMP;
@@ -1372,17 +1478,19 @@
} else {
adapter->tx_hwtstamp_skipped++;
}
+
+ spin_unlock_irqrestore(&adapter->ptp_tx_lock, flags);
}
/* record initial flags and protocol */
first->tx_flags = tx_flags;
first->protocol = protocol;
- tso = igc_tso(tx_ring, first, &hdr_len);
+ tso = igc_tso(tx_ring, first, launch_time, first_flag, &hdr_len);
if (tso < 0)
goto out_drop;
else if (!tso)
- igc_tx_csum(tx_ring, first);
+ igc_tx_csum(tx_ring, first, launch_time, first_flag);
igc_tx_map(tx_ring, first, hdr_len);
@@ -1463,14 +1571,36 @@
le32_to_cpu(rx_desc->wb.upper.status_error));
}
+/* Mapping HW RSS Type to enum pkt_hash_types */
+static const enum pkt_hash_types igc_rss_type_table[IGC_RSS_TYPE_MAX_TABLE] = {
+ [IGC_RSS_TYPE_NO_HASH] = PKT_HASH_TYPE_L2,
+ [IGC_RSS_TYPE_HASH_TCP_IPV4] = PKT_HASH_TYPE_L4,
+ [IGC_RSS_TYPE_HASH_IPV4] = PKT_HASH_TYPE_L3,
+ [IGC_RSS_TYPE_HASH_TCP_IPV6] = PKT_HASH_TYPE_L4,
+ [IGC_RSS_TYPE_HASH_IPV6_EX] = PKT_HASH_TYPE_L3,
+ [IGC_RSS_TYPE_HASH_IPV6] = PKT_HASH_TYPE_L3,
+ [IGC_RSS_TYPE_HASH_TCP_IPV6_EX] = PKT_HASH_TYPE_L4,
+ [IGC_RSS_TYPE_HASH_UDP_IPV4] = PKT_HASH_TYPE_L4,
+ [IGC_RSS_TYPE_HASH_UDP_IPV6] = PKT_HASH_TYPE_L4,
+ [IGC_RSS_TYPE_HASH_UDP_IPV6_EX] = PKT_HASH_TYPE_L4,
+ [10] = PKT_HASH_TYPE_NONE, /* RSS Type above 9 "Reserved" by HW */
+ [11] = PKT_HASH_TYPE_NONE, /* keep array sized for SW bit-mask */
+ [12] = PKT_HASH_TYPE_NONE, /* to handle future HW revisons */
+ [13] = PKT_HASH_TYPE_NONE,
+ [14] = PKT_HASH_TYPE_NONE,
+ [15] = PKT_HASH_TYPE_NONE,
+};
+
static inline void igc_rx_hash(struct igc_ring *ring,
union igc_adv_rx_desc *rx_desc,
struct sk_buff *skb)
{
- if (ring->netdev->features & NETIF_F_RXHASH)
- skb_set_hash(skb,
- le32_to_cpu(rx_desc->wb.lower.hi_dword.rss),
- PKT_HASH_TYPE_L3);
+ if (ring->netdev->features & NETIF_F_RXHASH) {
+ u32 rss_hash = le32_to_cpu(rx_desc->wb.lower.hi_dword.rss);
+ u32 rss_type = igc_rss_type(rx_desc);
+
+ skb_set_hash(skb, rss_hash, igc_rss_type_table[rss_type]);
+ }
}
/**
@@ -4756,9 +4886,10 @@
return false;
for (n = 0; n < qopt->num_entries; n++) {
- const struct tc_taprio_sched_entry *e;
+ const struct tc_taprio_sched_entry *e, *prev;
int i;
+ prev = n ? &qopt->entries[n - 1] : NULL;
e = &qopt->entries[n];
/* i225 only supports "global" frame preemption
@@ -4767,13 +4898,18 @@
if (e->command != TC_TAPRIO_CMD_SET_GATES)
return false;
- for (i = 0; i < adapter->num_tx_queues; i++) {
- if (e->gate_mask & BIT(i))
+ for (i = 0; i < adapter->num_tx_queues; i++)
+ if (e->gate_mask & BIT(i)) {
queue_uses[i]++;
- if (queue_uses[i] > 1)
- return false;
- }
+ /* There are limitations: A single queue cannot
+ * be opened and closed multiple times per cycle
+ * unless the gate stays open. Check for it.
+ */
+ if (queue_uses[i] > 1 &&
+ !(prev->gate_mask & BIT(i)))
+ return false;
+ }
}
return true;
@@ -4798,13 +4934,18 @@
static int igc_save_qbv_schedule(struct igc_adapter *adapter,
struct tc_taprio_qopt_offload *qopt)
{
+ bool queue_configured[IGC_MAX_TX_QUEUES] = { };
u32 start_time = 0, end_time = 0;
size_t n;
+ int i;
if (!qopt->enable) {
adapter->base_time = 0;
return 0;
}
+
+ if (qopt->base_time < 0)
+ return -ERANGE;
if (adapter->base_time)
return -EALREADY;
@@ -4815,14 +4956,25 @@
adapter->cycle_time = qopt->cycle_time;
adapter->base_time = qopt->base_time;
- /* FIXME: be a little smarter about cases when the gate for a
- * queue stays open for more than one entry.
- */
for (n = 0; n < qopt->num_entries; n++) {
struct tc_taprio_sched_entry *e = &qopt->entries[n];
- int i;
end_time += e->interval;
+
+ /* If any of the conditions below are true, we need to manually
+ * control the end time of the cycle.
+ * 1. Qbv users can specify a cycle time that is not equal
+ * to the total GCL intervals. Hence, recalculation is
+ * necessary here to exclude the time interval that
+ * exceeds the cycle time.
+ * 2. According to IEEE Std. 802.1Q-2018 section 8.6.9.2,
+ * once the end of the list is reached, it will switch
+ * to the END_OF_CYCLE state and leave the gates in the
+ * same state until the next cycle is started.
+ */
+ if (end_time > adapter->cycle_time ||
+ n + 1 == qopt->num_entries)
+ end_time = adapter->cycle_time;
for (i = 0; i < adapter->num_tx_queues; i++) {
struct igc_ring *ring = adapter->tx_ring[i];
@@ -4830,11 +4982,30 @@
if (!(e->gate_mask & BIT(i)))
continue;
- ring->start_time = start_time;
+ /* Check whether a queue stays open for more than one
+ * entry. If so, keep the start and advance the end
+ * time.
+ */
+ if (!queue_configured[i])
+ ring->start_time = start_time;
ring->end_time = end_time;
+
+ queue_configured[i] = true;
}
start_time += e->interval;
+ }
+
+ /* Check whether a queue gets configured.
+ * If not, set the start and end time to be end time.
+ */
+ for (i = 0; i < adapter->num_tx_queues; i++) {
+ if (!queue_configured[i]) {
+ struct igc_ring *ring = adapter->tx_ring[i];
+
+ ring->start_time = end_time;
+ ring->end_time = end_time;
+ }
}
return 0;
@@ -5110,6 +5281,7 @@
netdev->features |= NETIF_F_TSO;
netdev->features |= NETIF_F_TSO6;
netdev->features |= NETIF_F_TSO_ECN;
+ netdev->features |= NETIF_F_RXHASH;
netdev->features |= NETIF_F_RXCSUM;
netdev->features |= NETIF_F_HW_CSUM;
netdev->features |= NETIF_F_SCTP_CRC;
--
Gitblit v1.6.2