From 1543e317f1da31b75942316931e8f491a8920811 Mon Sep 17 00:00:00 2001
From: hc <hc@nodka.com>
Date: Thu, 04 Jan 2024 10:08:02 +0000
Subject: [PATCH] disable FB
---
kernel/drivers/net/ethernet/intel/i40e/i40e_txrx.c | 575 ++++++++++++++++++++++++++++++++------------------------
1 files changed, 326 insertions(+), 249 deletions(-)
diff --git a/kernel/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/kernel/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 9ccbcd8..43be33d 100644
--- a/kernel/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/kernel/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -2,22 +2,13 @@
/* Copyright(c) 2013 - 2018 Intel Corporation. */
#include <linux/prefetch.h>
-#include <net/busy_poll.h>
#include <linux/bpf_trace.h>
#include <net/xdp.h>
#include "i40e.h"
#include "i40e_trace.h"
#include "i40e_prototype.h"
-
-static inline __le64 build_ctob(u32 td_cmd, u32 td_offset, unsigned int size,
- u32 td_tag)
-{
- return cpu_to_le64(I40E_TX_DESC_DTYPE_DATA |
- ((u64)td_cmd << I40E_TXD_QW1_CMD_SHIFT) |
- ((u64)td_offset << I40E_TXD_QW1_OFFSET_SHIFT) |
- ((u64)size << I40E_TXD_QW1_TX_BUF_SZ_SHIFT) |
- ((u64)td_tag << I40E_TXD_QW1_L2TAG1_SHIFT));
-}
+#include "i40e_txrx_common.h"
+#include "i40e_xsk.h"
#define I40E_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS)
/**
@@ -530,28 +521,29 @@
/**
* i40e_fd_handle_status - check the Programming Status for FD
* @rx_ring: the Rx ring for this descriptor
- * @rx_desc: the Rx descriptor for programming Status, not a packet descriptor.
+ * @qword0_raw: qword0
+ * @qword1: qword1 after le_to_cpu
* @prog_id: the id originally used for programming
*
* This is used to verify if the FD programming or invalidation
* requested by SW to the HW is successful or not and take actions accordingly.
**/
-static void i40e_fd_handle_status(struct i40e_ring *rx_ring,
- union i40e_rx_desc *rx_desc, u8 prog_id)
+static void i40e_fd_handle_status(struct i40e_ring *rx_ring, u64 qword0_raw,
+ u64 qword1, u8 prog_id)
{
struct i40e_pf *pf = rx_ring->vsi->back;
struct pci_dev *pdev = pf->pdev;
+ struct i40e_16b_rx_wb_qw0 *qw0;
u32 fcnt_prog, fcnt_avail;
u32 error;
- u64 qw;
- qw = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
- error = (qw & I40E_RX_PROG_STATUS_DESC_QW1_ERROR_MASK) >>
+ qw0 = (struct i40e_16b_rx_wb_qw0 *)&qword0_raw;
+ error = (qword1 & I40E_RX_PROG_STATUS_DESC_QW1_ERROR_MASK) >>
I40E_RX_PROG_STATUS_DESC_QW1_ERROR_SHIFT;
if (error == BIT(I40E_RX_PROG_STATUS_DESC_FD_TBL_FULL_SHIFT)) {
- pf->fd_inv = le32_to_cpu(rx_desc->wb.qword0.hi_dword.fd_id);
- if ((rx_desc->wb.qword0.hi_dword.fd_id != 0) ||
+ pf->fd_inv = le32_to_cpu(qw0->hi_dword.fd_id);
+ if (qw0->hi_dword.fd_id != 0 ||
(I40E_DEBUG_FD & pf->hw.debug_mask))
dev_warn(&pdev->dev, "ntuple filter loc = %d, could not be added\n",
pf->fd_inv);
@@ -569,7 +561,7 @@
/* store the current atr filter count */
pf->fd_atr_cnt = i40e_get_current_atr_cnt(pf);
- if ((rx_desc->wb.qword0.hi_dword.fd_id == 0) &&
+ if (qw0->hi_dword.fd_id == 0 &&
test_bit(__I40E_FD_SB_AUTO_DISABLED, pf->state)) {
/* These set_bit() calls aren't atomic with the
* test_bit() here, but worse case we potentially
@@ -598,7 +590,7 @@
} else if (error == BIT(I40E_RX_PROG_STATUS_DESC_NO_FD_ENTRY_SHIFT)) {
if (I40E_DEBUG_FD & pf->hw.debug_mask)
dev_info(&pdev->dev, "ntuple filter fd_id = %d, could not be removed\n",
- rx_desc->wb.qword0.hi_dword.fd_id);
+ qw0->hi_dword.fd_id);
}
}
@@ -644,13 +636,18 @@
unsigned long bi_size;
u16 i;
- /* ring already cleared, nothing to do */
- if (!tx_ring->tx_bi)
- return;
+ if (ring_is_xdp(tx_ring) && tx_ring->xsk_pool) {
+ i40e_xsk_clean_tx_ring(tx_ring);
+ } else {
+ /* ring already cleared, nothing to do */
+ if (!tx_ring->tx_bi)
+ return;
- /* Free all the Tx ring sk_buffs */
- for (i = 0; i < tx_ring->count; i++)
- i40e_unmap_and_free_tx_resource(tx_ring, &tx_ring->tx_bi[i]);
+ /* Free all the Tx ring sk_buffs */
+ for (i = 0; i < tx_ring->count; i++)
+ i40e_unmap_and_free_tx_resource(tx_ring,
+ &tx_ring->tx_bi[i]);
+ }
bi_size = sizeof(struct i40e_tx_buffer) * tx_ring->count;
memset(tx_ring->tx_bi, 0, bi_size);
@@ -767,8 +764,6 @@
}
}
-#define WB_STRIDE 4
-
/**
* i40e_clean_tx_irq - Reclaim resources after transmit completes
* @vsi: the VSI we care about
@@ -780,7 +775,7 @@
static bool i40e_clean_tx_irq(struct i40e_vsi *vsi,
struct i40e_ring *tx_ring, int napi_budget)
{
- u16 i = tx_ring->next_to_clean;
+ int i = tx_ring->next_to_clean;
struct i40e_tx_buffer *tx_buf;
struct i40e_tx_desc *tx_head;
struct i40e_tx_desc *tx_desc;
@@ -873,27 +868,8 @@
i += tx_ring->count;
tx_ring->next_to_clean = i;
- u64_stats_update_begin(&tx_ring->syncp);
- tx_ring->stats.bytes += total_bytes;
- tx_ring->stats.packets += total_packets;
- u64_stats_update_end(&tx_ring->syncp);
- tx_ring->q_vector->tx.total_bytes += total_bytes;
- tx_ring->q_vector->tx.total_packets += total_packets;
-
- if (tx_ring->flags & I40E_TXR_FLAGS_WB_ON_ITR) {
- /* check to see if there are < 4 descriptors
- * waiting to be written back, then kick the hardware to force
- * them to be written back in case we stay in NAPI.
- * In this mode on X722 we do not enable Interrupt.
- */
- unsigned int j = i40e_get_tx_pending(tx_ring, false);
-
- if (budget &&
- ((j / WB_STRIDE) == 0) && (j > 0) &&
- !test_bit(__I40E_VSI_DOWN, vsi->state) &&
- (I40E_DESC_UNUSED(tx_ring) != tx_ring->count))
- tx_ring->arm_wb = true;
- }
+ i40e_update_tx_stats(tx_ring, total_packets, total_bytes);
+ i40e_arm_wb(tx_ring, vsi, budget);
if (ring_is_xdp(tx_ring))
return !!budget;
@@ -1220,6 +1196,11 @@
rc->total_packets = 0;
}
+static struct i40e_rx_buffer *i40e_rx_bi(struct i40e_ring *rx_ring, u32 idx)
+{
+ return &rx_ring->rx_bi[idx];
+}
+
/**
* i40e_reuse_rx_page - page flip buffer and store it back on the ring
* @rx_ring: rx descriptor ring to store buffers on
@@ -1233,7 +1214,7 @@
struct i40e_rx_buffer *new_buff;
u16 nta = rx_ring->next_to_alloc;
- new_buff = &rx_ring->rx_bi[nta];
+ new_buff = i40e_rx_bi(rx_ring, nta);
/* update, and store next to alloc */
nta++;
@@ -1244,65 +1225,35 @@
new_buff->page = old_buff->page;
new_buff->page_offset = old_buff->page_offset;
new_buff->pagecnt_bias = old_buff->pagecnt_bias;
-}
-/**
- * i40e_rx_is_programming_status - check for programming status descriptor
- * @qw: qword representing status_error_len in CPU ordering
- *
- * The value of in the descriptor length field indicate if this
- * is a programming status descriptor for flow director or FCoE
- * by the value of I40E_RX_PROG_STATUS_DESC_LENGTH, otherwise
- * it is a packet descriptor.
- **/
-static inline bool i40e_rx_is_programming_status(u64 qw)
-{
- /* The Rx filter programming status and SPH bit occupy the same
- * spot in the descriptor. Since we don't support packet split we
- * can just reuse the bit as an indication that this is a
- * programming status descriptor.
- */
- return qw & I40E_RXD_QW1_LENGTH_SPH_MASK;
+ rx_ring->rx_stats.page_reuse_count++;
+
+ /* clear contents of buffer_info */
+ old_buff->page = NULL;
}
/**
* i40e_clean_programming_status - clean the programming status descriptor
* @rx_ring: the rx ring that has this descriptor
- * @rx_desc: the rx descriptor written back by HW
- * @qw: qword representing status_error_len in CPU ordering
+ * @qword0_raw: qword0
+ * @qword1: qword1 representing status_error_len in CPU ordering
*
* Flow director should handle FD_FILTER_STATUS to check its filter programming
* status being successful or not and take actions accordingly. FCoE should
* handle its context/filter programming/invalidation status and take actions.
*
+ * Returns an i40e_rx_buffer to reuse if the cleanup occurred, otherwise NULL.
**/
-static void i40e_clean_programming_status(struct i40e_ring *rx_ring,
- union i40e_rx_desc *rx_desc,
- u64 qw)
+void i40e_clean_programming_status(struct i40e_ring *rx_ring, u64 qword0_raw,
+ u64 qword1)
{
- struct i40e_rx_buffer *rx_buffer;
- u32 ntc = rx_ring->next_to_clean;
u8 id;
- /* fetch, update, and store next to clean */
- rx_buffer = &rx_ring->rx_bi[ntc++];
- ntc = (ntc < rx_ring->count) ? ntc : 0;
- rx_ring->next_to_clean = ntc;
-
- prefetch(I40E_RX_DESC(rx_ring, ntc));
-
- /* place unused page back on the ring */
- i40e_reuse_rx_page(rx_ring, rx_buffer);
- rx_ring->rx_stats.page_reuse_count++;
-
- /* clear contents of buffer_info */
- rx_buffer->page = NULL;
-
- id = (qw & I40E_RX_PROG_STATUS_DESC_QW1_PROGID_MASK) >>
+ id = (qword1 & I40E_RX_PROG_STATUS_DESC_QW1_PROGID_MASK) >>
I40E_RX_PROG_STATUS_DESC_QW1_PROGID_SHIFT;
if (id == I40E_RX_PROG_STATUS_DESC_FD_FILTER_STATUS)
- i40e_fd_handle_status(rx_ring, rx_desc, id);
+ i40e_fd_handle_status(rx_ring, qword0_raw, qword1, id);
}
/**
@@ -1354,13 +1305,17 @@
return -ENOMEM;
}
+static void i40e_clear_rx_bi(struct i40e_ring *rx_ring)
+{
+ memset(rx_ring->rx_bi, 0, sizeof(*rx_ring->rx_bi) * rx_ring->count);
+}
+
/**
* i40e_clean_rx_ring - Free Rx buffers
* @rx_ring: ring to be cleaned
**/
void i40e_clean_rx_ring(struct i40e_ring *rx_ring)
{
- unsigned long bi_size;
u16 i;
/* ring already cleared, nothing to do */
@@ -1372,9 +1327,14 @@
rx_ring->skb = NULL;
}
+ if (rx_ring->xsk_pool) {
+ i40e_xsk_clean_rx_ring(rx_ring);
+ goto skip_free;
+ }
+
/* Free all the Rx ring sk_buffs */
for (i = 0; i < rx_ring->count; i++) {
- struct i40e_rx_buffer *rx_bi = &rx_ring->rx_bi[i];
+ struct i40e_rx_buffer *rx_bi = i40e_rx_bi(rx_ring, i);
if (!rx_bi->page)
continue;
@@ -1400,8 +1360,11 @@
rx_bi->page_offset = 0;
}
- bi_size = sizeof(struct i40e_rx_buffer) * rx_ring->count;
- memset(rx_ring->rx_bi, 0, bi_size);
+skip_free:
+ if (rx_ring->xsk_pool)
+ i40e_clear_rx_bi_zc(rx_ring);
+ else
+ i40e_clear_rx_bi(rx_ring);
/* Zero out the descriptor ring */
memset(rx_ring->desc, 0, rx_ring->size);
@@ -1442,20 +1405,12 @@
int i40e_setup_rx_descriptors(struct i40e_ring *rx_ring)
{
struct device *dev = rx_ring->dev;
- int err = -ENOMEM;
- int bi_size;
-
- /* warn if we are about to overwrite the pointer */
- WARN_ON(rx_ring->rx_bi);
- bi_size = sizeof(struct i40e_rx_buffer) * rx_ring->count;
- rx_ring->rx_bi = kzalloc(bi_size, GFP_KERNEL);
- if (!rx_ring->rx_bi)
- goto err;
+ int err;
u64_stats_init(&rx_ring->syncp);
/* Round up to nearest 4K */
- rx_ring->size = rx_ring->count * sizeof(union i40e_32byte_rx_desc);
+ rx_ring->size = rx_ring->count * sizeof(union i40e_rx_desc);
rx_ring->size = ALIGN(rx_ring->size, 4096);
rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size,
&rx_ring->dma, GFP_KERNEL);
@@ -1463,7 +1418,7 @@
if (!rx_ring->desc) {
dev_info(dev, "Unable to allocate memory for the Rx descriptor ring, size=%d\n",
rx_ring->size);
- goto err;
+ return -ENOMEM;
}
rx_ring->next_to_alloc = 0;
@@ -1475,16 +1430,17 @@
err = xdp_rxq_info_reg(&rx_ring->xdp_rxq, rx_ring->netdev,
rx_ring->queue_index);
if (err < 0)
- goto err;
+ return err;
}
rx_ring->xdp_prog = rx_ring->vsi->xdp_prog;
+ rx_ring->rx_bi =
+ kcalloc(rx_ring->count, sizeof(*rx_ring->rx_bi), GFP_KERNEL);
+ if (!rx_ring->rx_bi)
+ return -ENOMEM;
+
return 0;
-err:
- kfree(rx_ring->rx_bi);
- rx_ring->rx_bi = NULL;
- return err;
}
/**
@@ -1492,7 +1448,7 @@
* @rx_ring: ring to bump
* @val: new head index
**/
-static inline void i40e_release_rx_desc(struct i40e_ring *rx_ring, u32 val)
+void i40e_release_rx_desc(struct i40e_ring *rx_ring, u32 val)
{
rx_ring->next_to_use = val;
@@ -1517,6 +1473,22 @@
static inline unsigned int i40e_rx_offset(struct i40e_ring *rx_ring)
{
return ring_uses_build_skb(rx_ring) ? I40E_SKB_PAD : 0;
+}
+
+static unsigned int i40e_rx_frame_truesize(struct i40e_ring *rx_ring,
+ unsigned int size)
+{
+ unsigned int truesize;
+
+#if (PAGE_SIZE < 8192)
+ truesize = i40e_rx_pg_size(rx_ring) / 2; /* Must be power-of-2 */
+#else
+ truesize = i40e_rx_offset(rx_ring) ?
+ SKB_DATA_ALIGN(size + i40e_rx_offset(rx_ring)) +
+ SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) :
+ SKB_DATA_ALIGN(size);
+#endif
+ return truesize;
}
/**
@@ -1571,24 +1543,6 @@
}
/**
- * i40e_receive_skb - Send a completed packet up the stack
- * @rx_ring: rx ring in play
- * @skb: packet to send up
- * @vlan_tag: vlan tag for packet
- **/
-static void i40e_receive_skb(struct i40e_ring *rx_ring,
- struct sk_buff *skb, u16 vlan_tag)
-{
- struct i40e_q_vector *q_vector = rx_ring->q_vector;
-
- if ((rx_ring->netdev->features & NETIF_F_HW_VLAN_CTAG_RX) &&
- (vlan_tag & VLAN_VID_MASK))
- __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag);
-
- napi_gro_receive(&q_vector->napi, skb);
-}
-
-/**
* i40e_alloc_rx_buffers - Replace used receive buffers
* @rx_ring: ring to place buffers on
* @cleaned_count: number of buffers to replace
@@ -1606,7 +1560,7 @@
return false;
rx_desc = I40E_RX_DESC(rx_ring, ntu);
- bi = &rx_ring->rx_bi[ntu];
+ bi = i40e_rx_bi(rx_ring, ntu);
do {
if (!i40e_alloc_mapped_page(rx_ring, bi))
@@ -1628,7 +1582,7 @@
ntu++;
if (unlikely(ntu == rx_ring->count)) {
rx_desc = I40E_RX_DESC(rx_ring, 0);
- bi = rx_ring->rx_bi;
+ bi = i40e_rx_bi(rx_ring, 0);
ntu = 0;
}
@@ -1733,7 +1687,7 @@
case I40E_RX_PTYPE_INNER_PROT_UDP:
case I40E_RX_PTYPE_INNER_PROT_SCTP:
skb->ip_summed = CHECKSUM_UNNECESSARY;
- /* fall though */
+ fallthrough;
default:
break;
}
@@ -1798,16 +1752,13 @@
* @rx_ring: rx descriptor ring packet is being transacted on
* @rx_desc: pointer to the EOP Rx descriptor
* @skb: pointer to current skb being populated
- * @rx_ptype: the packet type decoded by hardware
*
* This function checks the ring, descriptor, and packet information in
* order to populate the hash, checksum, VLAN, protocol, and
* other fields within the skb.
**/
-static inline
void i40e_process_skb_fields(struct i40e_ring *rx_ring,
- union i40e_rx_desc *rx_desc, struct sk_buff *skb,
- u8 rx_ptype)
+ union i40e_rx_desc *rx_desc, struct sk_buff *skb)
{
u64 qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
u32 rx_status = (qword & I40E_RXD_QW1_STATUS_MASK) >>
@@ -1815,6 +1766,8 @@
u32 tsynvalid = rx_status & I40E_RXD_QW1_STATUS_TSYNVALID_MASK;
u32 tsyn = (rx_status & I40E_RXD_QW1_STATUS_TSYNINDX_MASK) >>
I40E_RXD_QW1_STATUS_TSYNINDX_SHIFT;
+ u8 rx_ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >>
+ I40E_RXD_QW1_PTYPE_SHIFT;
if (unlikely(tsynvalid))
i40e_ptp_rx_hwtstamp(rx_ring->vsi->back, skb, tsyn);
@@ -1824,6 +1777,13 @@
i40e_rx_checksum(rx_ring->vsi, skb, rx_desc);
skb_record_rx_queue(skb, rx_ring->queue_index);
+
+ if (qword & BIT(I40E_RX_DESC_STATUS_L2TAG1P_SHIFT)) {
+ __le16 vlan_tag = rx_desc->wb.qword0.lo_dword.l2tag1;
+
+ __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
+ le16_to_cpu(vlan_tag));
+ }
/* modifies the skb - consumes the enet header */
skb->protocol = eth_type_trans(skb, rx_ring->netdev);
@@ -1847,10 +1807,6 @@
union i40e_rx_desc *rx_desc)
{
- /* XDP packets use error pointer so abort at this point */
- if (IS_ERR(skb))
- return true;
-
/* ERR_MASK will only have valid bits if EOP set, and
* what we are doing here is actually checking
* I40E_RX_DESC_ERROR_RXE_SHIFT, since it is the zeroth bit in
@@ -1887,6 +1843,7 @@
* the adapter for another receive
*
* @rx_buffer: buffer containing the page
+ * @rx_buffer_pgcnt: buffer page refcount pre xdp_do_redirect() call
*
* If page is reusable, rx_buffer->page_offset is adjusted to point to
* an unused region in the page.
@@ -1909,7 +1866,8 @@
*
* In either case, if the page is reusable its refcount is increased.
**/
-static bool i40e_can_reuse_rx_page(struct i40e_rx_buffer *rx_buffer)
+static bool i40e_can_reuse_rx_page(struct i40e_rx_buffer *rx_buffer,
+ int rx_buffer_pgcnt)
{
unsigned int pagecnt_bias = rx_buffer->pagecnt_bias;
struct page *page = rx_buffer->page;
@@ -1920,7 +1878,7 @@
#if (PAGE_SIZE < 8192)
/* if we are only owner of page we can reuse it */
- if (unlikely((page_count(page) - pagecnt_bias) > 1))
+ if (unlikely((rx_buffer_pgcnt - pagecnt_bias) > 1))
return false;
#else
#define I40E_LAST_OFFSET \
@@ -1979,17 +1937,25 @@
* i40e_get_rx_buffer - Fetch Rx buffer and synchronize data for use
* @rx_ring: rx descriptor ring to transact packets on
* @size: size of buffer to add to skb
+ * @rx_buffer_pgcnt: buffer page refcount
*
* This function will pull an Rx buffer from the ring and synchronize it
* for use by the CPU.
*/
static struct i40e_rx_buffer *i40e_get_rx_buffer(struct i40e_ring *rx_ring,
- const unsigned int size)
+ const unsigned int size,
+ int *rx_buffer_pgcnt)
{
struct i40e_rx_buffer *rx_buffer;
- rx_buffer = &rx_ring->rx_bi[rx_ring->next_to_clean];
- prefetchw(rx_buffer->page);
+ rx_buffer = i40e_rx_bi(rx_ring, rx_ring->next_to_clean);
+ *rx_buffer_pgcnt =
+#if (PAGE_SIZE < 8192)
+ page_count(rx_buffer->page);
+#else
+ 0;
+#endif
+ prefetch_page_address(rx_buffer->page);
/* we are reusing so sync this buffer for CPU use */
dma_sync_single_range_for_cpu(rx_ring->dev,
@@ -2028,10 +1994,8 @@
struct sk_buff *skb;
/* prefetch first cache line of first page */
- prefetch(xdp->data);
-#if L1_CACHE_BYTES < 128
- prefetch(xdp->data + L1_CACHE_BYTES);
-#endif
+ net_prefetch(xdp->data);
+
/* Note, we get here by enabling legacy-rx via:
*
* ethtool --set-priv-flags <dev> legacy-rx on
@@ -2058,7 +2022,8 @@
/* Determine available headroom for copy */
headlen = size;
if (headlen > I40E_RX_HDR_SIZE)
- headlen = eth_get_headlen(xdp->data, I40E_RX_HDR_SIZE);
+ headlen = eth_get_headlen(skb->dev, xdp->data,
+ I40E_RX_HDR_SIZE);
/* align pull length to size of long to optimize memcpy performance */
memcpy(__skb_put(skb, headlen), xdp->data,
@@ -2113,10 +2078,8 @@
* likely have a consumer accessing first few bytes of meta
* data, and then actual data.
*/
- prefetch(xdp->data_meta);
-#if L1_CACHE_BYTES < 128
- prefetch(xdp->data_meta + L1_CACHE_BYTES);
-#endif
+ net_prefetch(xdp->data_meta);
+
/* build an skb around the page buffer */
skb = build_skb(xdp->data_hard_start, truesize);
if (unlikely(!skb))
@@ -2142,17 +2105,18 @@
* i40e_put_rx_buffer - Clean up used buffer and either recycle or free
* @rx_ring: rx descriptor ring to transact packets on
* @rx_buffer: rx buffer to pull data from
+ * @rx_buffer_pgcnt: rx buffer page refcount pre xdp_do_redirect() call
*
* This function will clean up the contents of the rx_buffer. It will
* either recycle the buffer or unmap it and free the associated resources.
*/
static void i40e_put_rx_buffer(struct i40e_ring *rx_ring,
- struct i40e_rx_buffer *rx_buffer)
+ struct i40e_rx_buffer *rx_buffer,
+ int rx_buffer_pgcnt)
{
- if (i40e_can_reuse_rx_page(rx_buffer)) {
+ if (i40e_can_reuse_rx_page(rx_buffer, rx_buffer_pgcnt)) {
/* hand second half of page back to the ring */
i40e_reuse_rx_page(rx_ring, rx_buffer);
- rx_ring->rx_stats.page_reuse_count++;
} else {
/* we are not reusing the buffer so unmap it */
dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma,
@@ -2160,10 +2124,9 @@
DMA_FROM_DEVICE, I40E_RX_DMA_ATTR);
__page_frag_cache_drain(rx_buffer->page,
rx_buffer->pagecnt_bias);
+ /* clear contents of buffer_info */
+ rx_buffer->page = NULL;
}
-
- /* clear contents of buffer_info */
- rx_buffer->page = NULL;
}
/**
@@ -2199,18 +2162,12 @@
return true;
}
-#define I40E_XDP_PASS 0
-#define I40E_XDP_CONSUMED BIT(0)
-#define I40E_XDP_TX BIT(1)
-#define I40E_XDP_REDIR BIT(2)
-
static int i40e_xmit_xdp_ring(struct xdp_frame *xdpf,
struct i40e_ring *xdp_ring);
-static int i40e_xmit_xdp_tx_ring(struct xdp_buff *xdp,
- struct i40e_ring *xdp_ring)
+int i40e_xmit_xdp_tx_ring(struct xdp_buff *xdp, struct i40e_ring *xdp_ring)
{
- struct xdp_frame *xdpf = convert_to_xdp_frame(xdp);
+ struct xdp_frame *xdpf = xdp_convert_buff_to_frame(xdp);
if (unlikely(!xdpf))
return I40E_XDP_CONSUMED;
@@ -2223,8 +2180,7 @@
* @rx_ring: Rx ring being processed
* @xdp: XDP buffer containing the frame
**/
-static struct sk_buff *i40e_run_xdp(struct i40e_ring *rx_ring,
- struct xdp_buff *xdp)
+static int i40e_run_xdp(struct i40e_ring *rx_ring, struct xdp_buff *xdp)
{
int err, result = I40E_XDP_PASS;
struct i40e_ring *xdp_ring;
@@ -2246,24 +2202,29 @@
case XDP_TX:
xdp_ring = rx_ring->vsi->xdp_rings[rx_ring->queue_index];
result = i40e_xmit_xdp_tx_ring(xdp, xdp_ring);
+ if (result == I40E_XDP_CONSUMED)
+ goto out_failure;
break;
case XDP_REDIRECT:
err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog);
- result = !err ? I40E_XDP_REDIR : I40E_XDP_CONSUMED;
+ if (err)
+ goto out_failure;
+ result = I40E_XDP_REDIR;
break;
default:
bpf_warn_invalid_xdp_action(act);
- /* fall through */
+ fallthrough;
case XDP_ABORTED:
+out_failure:
trace_xdp_exception(rx_ring->netdev, xdp_prog, act);
- /* fall through -- handle aborts by dropping packet */
+ fallthrough; /* handle aborts by dropping packet */
case XDP_DROP:
result = I40E_XDP_CONSUMED;
break;
}
xdp_out:
rcu_read_unlock();
- return ERR_PTR(-result);
+ return result;
}
/**
@@ -2276,24 +2237,83 @@
struct i40e_rx_buffer *rx_buffer,
unsigned int size)
{
-#if (PAGE_SIZE < 8192)
- unsigned int truesize = i40e_rx_pg_size(rx_ring) / 2;
+ unsigned int truesize = i40e_rx_frame_truesize(rx_ring, size);
+#if (PAGE_SIZE < 8192)
rx_buffer->page_offset ^= truesize;
#else
- unsigned int truesize = SKB_DATA_ALIGN(i40e_rx_offset(rx_ring) + size);
-
rx_buffer->page_offset += truesize;
#endif
}
-static inline void i40e_xdp_ring_update_tail(struct i40e_ring *xdp_ring)
+/**
+ * i40e_xdp_ring_update_tail - Updates the XDP Tx ring tail register
+ * @xdp_ring: XDP Tx ring
+ *
+ * This function updates the XDP Tx ring tail register.
+ **/
+void i40e_xdp_ring_update_tail(struct i40e_ring *xdp_ring)
{
/* Force memory writes to complete before letting h/w
* know there are new descriptors to fetch.
*/
wmb();
writel_relaxed(xdp_ring->next_to_use, xdp_ring->tail);
+}
+
+/**
+ * i40e_update_rx_stats - Update Rx ring statistics
+ * @rx_ring: rx descriptor ring
+ * @total_rx_bytes: number of bytes received
+ * @total_rx_packets: number of packets received
+ *
+ * This function updates the Rx ring statistics.
+ **/
+void i40e_update_rx_stats(struct i40e_ring *rx_ring,
+ unsigned int total_rx_bytes,
+ unsigned int total_rx_packets)
+{
+ u64_stats_update_begin(&rx_ring->syncp);
+ rx_ring->stats.packets += total_rx_packets;
+ rx_ring->stats.bytes += total_rx_bytes;
+ u64_stats_update_end(&rx_ring->syncp);
+ rx_ring->q_vector->rx.total_packets += total_rx_packets;
+ rx_ring->q_vector->rx.total_bytes += total_rx_bytes;
+}
+
+/**
+ * i40e_finalize_xdp_rx - Bump XDP Tx tail and/or flush redirect map
+ * @rx_ring: Rx ring
+ * @xdp_res: Result of the receive batch
+ *
+ * This function bumps XDP Tx tail and/or flush redirect map, and
+ * should be called when a batch of packets has been processed in the
+ * napi loop.
+ **/
+void i40e_finalize_xdp_rx(struct i40e_ring *rx_ring, unsigned int xdp_res)
+{
+ if (xdp_res & I40E_XDP_REDIR)
+ xdp_do_flush_map();
+
+ if (xdp_res & I40E_XDP_TX) {
+ struct i40e_ring *xdp_ring =
+ rx_ring->vsi->xdp_rings[rx_ring->queue_index];
+
+ i40e_xdp_ring_update_tail(xdp_ring);
+ }
+}
+
+/**
+ * i40e_inc_ntc: Advance the next_to_clean index
+ * @rx_ring: Rx ring
+ **/
+static void i40e_inc_ntc(struct i40e_ring *rx_ring)
+{
+ u32 ntc = rx_ring->next_to_clean + 1;
+
+ ntc = (ntc < rx_ring->count) ? ntc : 0;
+ rx_ring->next_to_clean = ntc;
+ prefetch(I40E_RX_DESC(rx_ring, ntc));
}
/**
@@ -2316,15 +2336,18 @@
unsigned int xdp_xmit = 0;
bool failure = false;
struct xdp_buff xdp;
+ int xdp_res = 0;
+#if (PAGE_SIZE < 8192)
+ xdp.frame_sz = i40e_rx_frame_truesize(rx_ring, 0);
+#endif
xdp.rxq = &rx_ring->xdp_rxq;
while (likely(total_rx_packets < (unsigned int)budget)) {
struct i40e_rx_buffer *rx_buffer;
union i40e_rx_desc *rx_desc;
+ int rx_buffer_pgcnt;
unsigned int size;
- u16 vlan_tag;
- u8 rx_ptype;
u64 qword;
/* return some buffers to hardware, one at a time is too slow */
@@ -2349,18 +2372,24 @@
*/
dma_rmb();
- if (unlikely(i40e_rx_is_programming_status(qword))) {
- i40e_clean_programming_status(rx_ring, rx_desc, qword);
+ if (i40e_rx_is_programming_status(qword)) {
+ i40e_clean_programming_status(rx_ring,
+ rx_desc->raw.qword[0],
+ qword);
+ rx_buffer = i40e_rx_bi(rx_ring, rx_ring->next_to_clean);
+ i40e_inc_ntc(rx_ring);
+ i40e_reuse_rx_page(rx_ring, rx_buffer);
cleaned_count++;
continue;
}
+
size = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) >>
I40E_RXD_QW1_LENGTH_PBUF_SHIFT;
if (!size)
break;
i40e_trace(clean_rx_irq, rx_ring, rx_desc, skb);
- rx_buffer = i40e_get_rx_buffer(rx_ring, size);
+ rx_buffer = i40e_get_rx_buffer(rx_ring, size, &rx_buffer_pgcnt);
/* retrieve a buffer from the ring */
if (!skb) {
@@ -2370,13 +2399,14 @@
xdp.data_hard_start = xdp.data -
i40e_rx_offset(rx_ring);
xdp.data_end = xdp.data + size;
-
- skb = i40e_run_xdp(rx_ring, &xdp);
+#if (PAGE_SIZE > 4096)
+ /* At larger PAGE_SIZE, frame_sz depend on len size */
+ xdp.frame_sz = i40e_rx_frame_truesize(rx_ring, size);
+#endif
+ xdp_res = i40e_run_xdp(rx_ring, &xdp);
}
- if (IS_ERR(skb)) {
- unsigned int xdp_res = -PTR_ERR(skb);
-
+ if (xdp_res) {
if (xdp_res & (I40E_XDP_TX | I40E_XDP_REDIR)) {
xdp_xmit |= xdp_res;
i40e_rx_buffer_flip(rx_ring, rx_buffer, size);
@@ -2394,19 +2424,19 @@
}
/* exit if we failed to retrieve a buffer */
- if (!skb) {
+ if (!xdp_res && !skb) {
rx_ring->rx_stats.alloc_buff_failed++;
rx_buffer->pagecnt_bias++;
break;
}
- i40e_put_rx_buffer(rx_ring, rx_buffer);
+ i40e_put_rx_buffer(rx_ring, rx_buffer, rx_buffer_pgcnt);
cleaned_count++;
if (i40e_is_non_eop(rx_ring, rx_desc, skb))
continue;
- if (i40e_cleanup_headers(rx_ring, skb, rx_desc)) {
+ if (xdp_res || i40e_cleanup_headers(rx_ring, skb, rx_desc)) {
skb = NULL;
continue;
}
@@ -2414,42 +2444,21 @@
/* probably a little skewed due to removing CRC */
total_rx_bytes += skb->len;
- qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
- rx_ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >>
- I40E_RXD_QW1_PTYPE_SHIFT;
-
/* populate checksum, VLAN, and protocol */
- i40e_process_skb_fields(rx_ring, rx_desc, skb, rx_ptype);
-
- vlan_tag = (qword & BIT(I40E_RX_DESC_STATUS_L2TAG1P_SHIFT)) ?
- le16_to_cpu(rx_desc->wb.qword0.lo_dword.l2tag1) : 0;
+ i40e_process_skb_fields(rx_ring, rx_desc, skb);
i40e_trace(clean_rx_irq_rx, rx_ring, rx_desc, skb);
- i40e_receive_skb(rx_ring, skb, vlan_tag);
+ napi_gro_receive(&rx_ring->q_vector->napi, skb);
skb = NULL;
/* update budget accounting */
total_rx_packets++;
}
- if (xdp_xmit & I40E_XDP_REDIR)
- xdp_do_flush_map();
-
- if (xdp_xmit & I40E_XDP_TX) {
- struct i40e_ring *xdp_ring =
- rx_ring->vsi->xdp_rings[rx_ring->queue_index];
-
- i40e_xdp_ring_update_tail(xdp_ring);
- }
-
+ i40e_finalize_xdp_rx(rx_ring, xdp_xmit);
rx_ring->skb = skb;
- u64_stats_update_begin(&rx_ring->syncp);
- rx_ring->stats.packets += total_rx_packets;
- rx_ring->stats.bytes += total_rx_bytes;
- u64_stats_update_end(&rx_ring->syncp);
- rx_ring->q_vector->rx.total_packets += total_rx_packets;
- rx_ring->q_vector->rx.total_bytes += total_rx_bytes;
+ i40e_update_rx_stats(rx_ring, total_rx_bytes, total_rx_packets);
/* guarantee a trip back through this routine if there was a failure */
return failure ? budget : (int)total_rx_packets;
@@ -2587,7 +2596,11 @@
* budget and be more aggressive about cleaning up the Tx descriptors.
*/
i40e_for_each_ring(ring, q_vector->tx) {
- if (!i40e_clean_tx_irq(vsi, ring, budget)) {
+ bool wd = ring->xsk_pool ?
+ i40e_clean_xdp_tx_irq(vsi, ring) :
+ i40e_clean_tx_irq(vsi, ring, budget);
+
+ if (!wd) {
clean_complete = false;
continue;
}
@@ -2599,13 +2612,21 @@
if (budget <= 0)
goto tx_only;
- /* We attempt to distribute budget to each Rx queue fairly, but don't
- * allow the budget to go below 1 because that would exit polling early.
- */
- budget_per_ring = max(budget/q_vector->num_ringpairs, 1);
+ /* normally we have 1 Rx ring per q_vector */
+ if (unlikely(q_vector->num_ringpairs > 1))
+ /* We attempt to distribute budget to each Rx queue fairly, but
+ * don't allow the budget to go below 1 because that would exit
+ * polling early.
+ */
+ budget_per_ring = max_t(int, budget / q_vector->num_ringpairs, 1);
+ else
+ /* Max of 1 Rx ring in this q_vector so give it the budget */
+ budget_per_ring = budget;
i40e_for_each_ring(ring, q_vector->rx) {
- int cleaned = i40e_clean_rx_irq(ring, budget_per_ring);
+ int cleaned = ring->xsk_pool ?
+ i40e_clean_rx_irq_zc(ring, budget_per_ring) :
+ i40e_clean_rx_irq(ring, budget_per_ring);
work_done += cleaned;
/* if we clean as many as budgeted, we must not be done */
@@ -2645,10 +2666,11 @@
if (vsi->back->flags & I40E_TXR_FLAGS_WB_ON_ITR)
q_vector->arm_wb_state = false;
- /* Work is done so exit the polling mode and re-enable the interrupt */
- napi_complete_done(napi, work_done);
-
- i40e_update_enable_itr(vsi, q_vector);
+ /* Exit the polling mode, but don't re-enable interrupts if stack might
+ * poll us due to busy-polling
+ */
+ if (likely(napi_complete_done(napi, work_done)))
+ i40e_update_enable_itr(vsi, q_vector);
return min(work_done, budget - 1);
}
@@ -2955,10 +2977,16 @@
/* remove payload length from inner checksum */
paylen = skb->len - l4_offset;
- csum_replace_by_diff(&l4.tcp->check, (__force __wsum)htonl(paylen));
- /* compute length of segmentation header */
- *hdr_len = (l4.tcp->doff * 4) + l4_offset;
+ if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) {
+ csum_replace_by_diff(&l4.udp->check, (__force __wsum)htonl(paylen));
+ /* compute length of segmentation header */
+ *hdr_len = sizeof(*l4.udp) + l4_offset;
+ } else {
+ csum_replace_by_diff(&l4.tcp->check, (__force __wsum)htonl(paylen));
+ /* compute length of segmentation header */
+ *hdr_len = (l4.tcp->doff * 4) + l4_offset;
+ }
/* pull values out of skb_shinfo */
gso_size = skb_shinfo(skb)->gso_size;
@@ -3260,7 +3288,7 @@
**/
bool __i40e_chk_linearize(struct sk_buff *skb)
{
- const struct skb_frag_struct *frag, *stale;
+ const skb_frag_t *frag, *stale;
int nr_frags, sum;
/* no need to check if number of frags is less than 7 */
@@ -3304,7 +3332,7 @@
* descriptor associated with the fragment.
*/
if (stale_size > I40E_MAX_DATA_PER_TXD) {
- int align_pad = -(stale->page_offset) &
+ int align_pad = -(skb_frag_off(stale)) &
(I40E_MAX_READ_REQ_SIZE - 1);
sum -= align_pad;
@@ -3347,7 +3375,7 @@
{
unsigned int data_len = skb->data_len;
unsigned int size = skb_headlen(skb);
- struct skb_frag_struct *frag;
+ skb_frag_t *frag;
struct i40e_tx_buffer *tx_bi;
struct i40e_tx_desc *tx_desc;
u16 i = tx_ring->next_to_use;
@@ -3454,6 +3482,8 @@
tx_desc->cmd_type_offset_bsz =
build_ctob(td_cmd, td_offset, size, td_tag);
+ skb_tx_timestamp(skb);
+
/* Force memory writes to complete before letting h/w know there
* are new descriptors to fetch.
*
@@ -3466,13 +3496,8 @@
first->next_to_watch = tx_desc;
/* notify HW of packet */
- if (netif_xmit_stopped(txring_txq(tx_ring)) || !skb->xmit_more) {
+ if (netif_xmit_stopped(txring_txq(tx_ring)) || !netdev_xmit_more()) {
writel(i, tx_ring->tail);
-
- /* we need this if more than one processor can write to our tail
- * at a time, it synchronizes IO on IA64/Altix systems
- */
- mmiowb();
}
return 0;
@@ -3496,9 +3521,58 @@
return -1;
}
+static u16 i40e_swdcb_skb_tx_hash(struct net_device *dev,
+ const struct sk_buff *skb,
+ u16 num_tx_queues)
+{
+ u32 jhash_initval_salt = 0xd631614b;
+ u32 hash;
+
+ if (skb->sk && skb->sk->sk_hash)
+ hash = skb->sk->sk_hash;
+ else
+ hash = (__force u16)skb->protocol ^ skb->hash;
+
+ hash = jhash_1word(hash, jhash_initval_salt);
+
+ return (u16)(((u64)hash * num_tx_queues) >> 32);
+}
+
+u16 i40e_lan_select_queue(struct net_device *netdev,
+ struct sk_buff *skb,
+ struct net_device __always_unused *sb_dev)
+{
+ struct i40e_netdev_priv *np = netdev_priv(netdev);
+ struct i40e_vsi *vsi = np->vsi;
+ struct i40e_hw *hw;
+ u16 qoffset;
+ u16 qcount;
+ u8 tclass;
+ u16 hash;
+ u8 prio;
+
+ /* is DCB enabled at all? */
+ if (vsi->tc_config.numtc == 1)
+ return netdev_pick_tx(netdev, skb, sb_dev);
+
+ prio = skb->priority;
+ hw = &vsi->back->hw;
+ tclass = hw->local_dcbx_config.etscfg.prioritytable[prio];
+ /* sanity check */
+ if (unlikely(!(vsi->tc_config.enabled_tc & BIT(tclass))))
+ tclass = 0;
+
+ /* select a queue assigned for the given TC */
+ qcount = vsi->tc_config.tc_info[tclass].qcount;
+ hash = i40e_swdcb_skb_tx_hash(netdev, skb, qcount);
+
+ qoffset = vsi->tc_config.tc_info[tclass].qoffset;
+ return qoffset + hash;
+}
+
/**
* i40e_xmit_xdp_ring - transmits an XDP buffer to an XDP Tx ring
- * @xdp: data to transmit
+ * @xdpf: data to transmit
* @xdp_ring: XDP Tx ring
**/
static int i40e_xmit_xdp_ring(struct xdp_frame *xdpf,
@@ -3507,6 +3581,7 @@
u16 i = xdp_ring->next_to_use;
struct i40e_tx_buffer *tx_bi;
struct i40e_tx_desc *tx_desc;
+ void *data = xdpf->data;
u32 size = xdpf->len;
dma_addr_t dma;
@@ -3514,8 +3589,7 @@
xdp_ring->tx_stats.tx_busy++;
return I40E_XDP_CONSUMED;
}
-
- dma = dma_map_single(xdp_ring->dev, xdpf->data, size, DMA_TO_DEVICE);
+ dma = dma_map_single(xdp_ring->dev, data, size, DMA_TO_DEVICE);
if (dma_mapping_error(xdp_ring->dev, dma))
return I40E_XDP_CONSUMED;
@@ -3539,6 +3613,7 @@
*/
smp_wmb();
+ xdp_ring->xdp_tx_active++;
i++;
if (i == xdp_ring->count)
i = 0;
@@ -3633,8 +3708,6 @@
if (tsyn)
tx_flags |= I40E_TX_FLAGS_TSYN;
- skb_tx_timestamp(skb);
-
/* always enable CRC insertion offload */
td_cmd |= I40E_TX_DESC_CMD_ICRC;
@@ -3694,7 +3767,9 @@
/**
* i40e_xdp_xmit - Implements ndo_xdp_xmit
* @dev: netdev
- * @xdp: XDP buffer
+ * @n: number of frames
+ * @frames: array of XDP buffer pointers
+ * @flags: XDP extra info
*
* Returns number of frames successfully sent. Frames that fail are
* free'ed via XDP return API.
@@ -3708,6 +3783,7 @@
struct i40e_netdev_priv *np = netdev_priv(dev);
unsigned int queue_index = smp_processor_id();
struct i40e_vsi *vsi = np->vsi;
+ struct i40e_pf *pf = vsi->back;
struct i40e_ring *xdp_ring;
int drops = 0;
int i;
@@ -3715,7 +3791,8 @@
if (test_bit(__I40E_VSI_DOWN, vsi->state))
return -ENETDOWN;
- if (!i40e_enabled_xdp_vsi(vsi) || queue_index >= vsi->num_queue_pairs)
+ if (!i40e_enabled_xdp_vsi(vsi) || queue_index >= vsi->num_queue_pairs ||
+ test_bit(__I40E_CONFIG_BUSY, pf->state))
return -ENXIO;
if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
--
Gitblit v1.6.2