From 37f49e37ab4cb5d0bc4c60eb5c6d4dd57db767bb Mon Sep 17 00:00:00 2001
From: hc <hc@nodka.com>
Date: Fri, 10 May 2024 07:44:59 +0000
Subject: [PATCH] gmac get mac form eeprom
---
kernel/drivers/net/ethernet/intel/igb/igb_main.c | 913 +++++++++++++++++++++++++++++++++++++++++---------------
1 files changed, 659 insertions(+), 254 deletions(-)
diff --git a/kernel/drivers/net/ethernet/intel/igb/igb_main.c b/kernel/drivers/net/ethernet/intel/igb/igb_main.c
index 74b50f1..01176c8 100644
--- a/kernel/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/kernel/drivers/net/ethernet/intel/igb/igb_main.c
@@ -30,6 +30,8 @@
#include <linux/if_ether.h>
#include <linux/aer.h>
#include <linux/prefetch.h>
+#include <linux/bpf.h>
+#include <linux/bpf_trace.h>
#include <linux/pm_runtime.h>
#include <linux/etherdevice.h>
#ifdef CONFIG_IGB_DCA
@@ -37,12 +39,6 @@
#endif
#include <linux/i2c.h>
#include "igb.h"
-
-#define MAJ 5
-#define MIN 4
-#define BUILD 0
-#define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \
-__stringify(BUILD) "-k"
enum queue_mode {
QUEUE_MODE_STRICT_PRIORITY,
@@ -55,7 +51,6 @@
};
char igb_driver_name[] = "igb";
-char igb_driver_version[] = DRV_VERSION;
static const char igb_driver_string[] =
"Intel(R) Gigabit Ethernet Network Driver";
static const char igb_copyright[] =
@@ -146,7 +141,7 @@
static bool igb_clean_tx_irq(struct igb_q_vector *, int);
static int igb_clean_rx_irq(struct igb_q_vector *, int);
static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
-static void igb_tx_timeout(struct net_device *);
+static void igb_tx_timeout(struct net_device *, unsigned int txqueue);
static void igb_reset_task(struct work_struct *);
static void igb_vlan_mode(struct net_device *netdev,
netdev_features_t features);
@@ -239,8 +234,7 @@
MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
-MODULE_LICENSE("GPL");
-MODULE_VERSION(DRV_VERSION);
+MODULE_LICENSE("GPL v2");
#define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV|NETIF_MSG_PROBE|NETIF_MSG_LINK)
static int debug = -1;
@@ -557,8 +551,7 @@
/**
* igb_get_i2c_data - Reads the I2C SDA data bit
- * @hw: pointer to hardware structure
- * @i2cctl: Current value of I2CCTL register
+ * @data: opaque pointer to adapter struct
*
* Returns the I2C data bit value
**/
@@ -666,8 +659,7 @@
{
int ret;
- pr_info("%s - version %s\n",
- igb_driver_string, igb_driver_version);
+ pr_info("%s\n", igb_driver_string);
pr_info("%s\n", igb_copyright);
#ifdef CONFIG_IGB_DCA
@@ -720,14 +712,13 @@
adapter->rx_ring[i]->reg_idx = rbase_offset +
Q_IDX_82576(i);
}
- /* Fall through */
+ fallthrough;
case e1000_82575:
case e1000_82580:
case e1000_i350:
case e1000_i354:
case e1000_i210:
case e1000_i211:
- /* Fall through */
default:
for (; i < adapter->num_rx_queues; i++)
adapter->rx_ring[i]->reg_idx = rbase_offset + i;
@@ -753,6 +744,8 @@
struct net_device *netdev = igb->netdev;
hw->hw_addr = NULL;
netdev_err(netdev, "PCIe link lost\n");
+ WARN(pci_device_is_present(igb->pdev),
+ "igb: Failed to read reg 0x%x!\n", reg);
}
return value;
@@ -1196,23 +1189,27 @@
{
struct igb_q_vector *q_vector;
struct igb_ring *ring;
- int ring_count, size;
+ int ring_count;
+ size_t size;
/* igb only supports 1 Tx and/or 1 Rx queue per vector */
if (txr_count > 1 || rxr_count > 1)
return -ENOMEM;
ring_count = txr_count + rxr_count;
- size = sizeof(struct igb_q_vector) +
- (sizeof(struct igb_ring) * ring_count);
+ size = struct_size(q_vector, ring, ring_count);
/* allocate q_vector and rings */
q_vector = adapter->q_vector[v_idx];
if (!q_vector) {
q_vector = kzalloc(size, GFP_KERNEL);
} else if (size > ksize(q_vector)) {
- kfree_rcu(q_vector, rcu);
- q_vector = kzalloc(size, GFP_KERNEL);
+ struct igb_q_vector *new_q_vector;
+
+ new_q_vector = kzalloc(size, GFP_KERNEL);
+ if (new_q_vector)
+ kfree_rcu(q_vector, rcu);
+ q_vector = new_q_vector;
} else {
memset(q_vector, 0, size);
}
@@ -1858,13 +1855,12 @@
* configuration' in respect to these parameters.
*/
- netdev_dbg(netdev, "Qav Tx mode: cbs %s, launchtime %s, queue %d \
- idleslope %d sendslope %d hiCredit %d \
- locredit %d\n",
- (ring->cbs_enable) ? "enabled" : "disabled",
- (ring->launchtime_enable) ? "enabled" : "disabled", queue,
- ring->idleslope, ring->sendslope, ring->hicredit,
- ring->locredit);
+ netdev_dbg(netdev, "Qav Tx mode: cbs %s, launchtime %s, queue %d idleslope %d sendslope %d hiCredit %d locredit %d\n",
+ ring->cbs_enable ? "enabled" : "disabled",
+ ring->launchtime_enable ? "enabled" : "disabled",
+ queue,
+ ring->idleslope, ring->sendslope,
+ ring->hicredit, ring->locredit);
}
static int igb_save_txtime_params(struct igb_adapter *adapter, int queue,
@@ -1943,7 +1939,7 @@
val = rd32(E1000_RXPBS);
val &= ~I210_RXPBSIZE_MASK;
- val |= I210_RXPBSIZE_PB_32KB;
+ val |= I210_RXPBSIZE_PB_30KB;
wr32(E1000_RXPBS, val);
/* Section 8.12.9 states that MAX_TPKT_SIZE from DTXMXPKTSZ
@@ -2237,7 +2233,6 @@
void igb_reinit_locked(struct igb_adapter *adapter)
{
- WARN_ON(in_interrupt());
while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
usleep_range(1000, 2000);
igb_down(adapter);
@@ -2379,7 +2374,7 @@
adapter->ei.get_invariants(hw);
adapter->flags &= ~IGB_FLAG_MEDIA_RESET;
}
- if ((mac->type == e1000_82575) &&
+ if ((mac->type == e1000_82575 || mac->type == e1000_i350) &&
(adapter->flags & IGB_FLAG_MAS_ENABLE)) {
igb_enable_mas(adapter);
}
@@ -2490,13 +2485,14 @@
else
igb_reset(adapter);
- return 0;
+ return 1;
}
static int igb_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
struct net_device *dev,
const unsigned char *addr, u16 vid,
- u16 flags)
+ u16 flags,
+ struct netlink_ext_ack *extack)
{
/* guarantee we can provide a unique filter for the unicast address */
if (is_unicast_ether_addr(addr) || is_link_local_ether_addr(addr)) {
@@ -2524,6 +2520,7 @@
if (unlikely(mac_hdr_len > IGB_MAX_MAC_HDR_LEN))
return features & ~(NETIF_F_HW_CSUM |
NETIF_F_SCTP_CRC |
+ NETIF_F_GSO_UDP_L4 |
NETIF_F_HW_VLAN_CTAG_TX |
NETIF_F_TSO |
NETIF_F_TSO6);
@@ -2532,6 +2529,7 @@
if (unlikely(network_hdr_len > IGB_MAX_NETWORK_HDR_LEN))
return features & ~(NETIF_F_HW_CSUM |
NETIF_F_SCTP_CRC |
+ NETIF_F_GSO_UDP_L4 |
NETIF_F_TSO |
NETIF_F_TSO6);
@@ -2586,13 +2584,15 @@
#define VLAN_PRIO_FULL_MASK (0x07)
static int igb_parse_cls_flower(struct igb_adapter *adapter,
- struct tc_cls_flower_offload *f,
+ struct flow_cls_offload *f,
int traffic_class,
struct igb_nfc_filter *input)
{
+ struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+ struct flow_dissector *dissector = rule->match.dissector;
struct netlink_ext_ack *extack = f->common.extack;
- if (f->dissector->used_keys &
+ if (dissector->used_keys &
~(BIT(FLOW_DISSECTOR_KEY_BASIC) |
BIT(FLOW_DISSECTOR_KEY_CONTROL) |
BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) |
@@ -2602,78 +2602,61 @@
return -EOPNOTSUPP;
}
- if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
- struct flow_dissector_key_eth_addrs *key, *mask;
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
+ struct flow_match_eth_addrs match;
- key = skb_flow_dissector_target(f->dissector,
- FLOW_DISSECTOR_KEY_ETH_ADDRS,
- f->key);
- mask = skb_flow_dissector_target(f->dissector,
- FLOW_DISSECTOR_KEY_ETH_ADDRS,
- f->mask);
-
- if (!is_zero_ether_addr(mask->dst)) {
- if (!is_broadcast_ether_addr(mask->dst)) {
+ flow_rule_match_eth_addrs(rule, &match);
+ if (!is_zero_ether_addr(match.mask->dst)) {
+ if (!is_broadcast_ether_addr(match.mask->dst)) {
NL_SET_ERR_MSG_MOD(extack, "Only full masks are supported for destination MAC address");
return -EINVAL;
}
input->filter.match_flags |=
IGB_FILTER_FLAG_DST_MAC_ADDR;
- ether_addr_copy(input->filter.dst_addr, key->dst);
+ ether_addr_copy(input->filter.dst_addr, match.key->dst);
}
- if (!is_zero_ether_addr(mask->src)) {
- if (!is_broadcast_ether_addr(mask->src)) {
+ if (!is_zero_ether_addr(match.mask->src)) {
+ if (!is_broadcast_ether_addr(match.mask->src)) {
NL_SET_ERR_MSG_MOD(extack, "Only full masks are supported for source MAC address");
return -EINVAL;
}
input->filter.match_flags |=
IGB_FILTER_FLAG_SRC_MAC_ADDR;
- ether_addr_copy(input->filter.src_addr, key->src);
+ ether_addr_copy(input->filter.src_addr, match.key->src);
}
}
- if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_BASIC)) {
- struct flow_dissector_key_basic *key, *mask;
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
+ struct flow_match_basic match;
- key = skb_flow_dissector_target(f->dissector,
- FLOW_DISSECTOR_KEY_BASIC,
- f->key);
- mask = skb_flow_dissector_target(f->dissector,
- FLOW_DISSECTOR_KEY_BASIC,
- f->mask);
-
- if (mask->n_proto) {
- if (mask->n_proto != ETHER_TYPE_FULL_MASK) {
+ flow_rule_match_basic(rule, &match);
+ if (match.mask->n_proto) {
+ if (match.mask->n_proto != ETHER_TYPE_FULL_MASK) {
NL_SET_ERR_MSG_MOD(extack, "Only full mask is supported for EtherType filter");
return -EINVAL;
}
input->filter.match_flags |= IGB_FILTER_FLAG_ETHER_TYPE;
- input->filter.etype = key->n_proto;
+ input->filter.etype = match.key->n_proto;
}
}
- if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_VLAN)) {
- struct flow_dissector_key_vlan *key, *mask;
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) {
+ struct flow_match_vlan match;
- key = skb_flow_dissector_target(f->dissector,
- FLOW_DISSECTOR_KEY_VLAN,
- f->key);
- mask = skb_flow_dissector_target(f->dissector,
- FLOW_DISSECTOR_KEY_VLAN,
- f->mask);
-
- if (mask->vlan_priority) {
- if (mask->vlan_priority != VLAN_PRIO_FULL_MASK) {
+ flow_rule_match_vlan(rule, &match);
+ if (match.mask->vlan_priority) {
+ if (match.mask->vlan_priority != VLAN_PRIO_FULL_MASK) {
NL_SET_ERR_MSG_MOD(extack, "Only full mask is supported for VLAN priority");
return -EINVAL;
}
input->filter.match_flags |= IGB_FILTER_FLAG_VLAN_TCI;
- input->filter.vlan_tci = key->vlan_priority;
+ input->filter.vlan_tci =
+ (__force __be16)match.key->vlan_priority;
}
}
@@ -2684,7 +2667,7 @@
}
static int igb_configure_clsflower(struct igb_adapter *adapter,
- struct tc_cls_flower_offload *cls_flower)
+ struct flow_cls_offload *cls_flower)
{
struct netlink_ext_ack *extack = cls_flower->common.extack;
struct igb_nfc_filter *filter, *f;
@@ -2746,7 +2729,7 @@
}
static int igb_delete_clsflower(struct igb_adapter *adapter,
- struct tc_cls_flower_offload *cls_flower)
+ struct flow_cls_offload *cls_flower)
{
struct igb_nfc_filter *filter;
int err;
@@ -2776,14 +2759,14 @@
}
static int igb_setup_tc_cls_flower(struct igb_adapter *adapter,
- struct tc_cls_flower_offload *cls_flower)
+ struct flow_cls_offload *cls_flower)
{
switch (cls_flower->command) {
- case TC_CLSFLOWER_REPLACE:
+ case FLOW_CLS_REPLACE:
return igb_configure_clsflower(adapter, cls_flower);
- case TC_CLSFLOWER_DESTROY:
+ case FLOW_CLS_DESTROY:
return igb_delete_clsflower(adapter, cls_flower);
- case TC_CLSFLOWER_STATS:
+ case FLOW_CLS_STATS:
return -EOPNOTSUPP;
default:
return -EOPNOTSUPP;
@@ -2802,25 +2785,6 @@
case TC_SETUP_CLSFLOWER:
return igb_setup_tc_cls_flower(adapter, type_data);
- default:
- return -EOPNOTSUPP;
- }
-}
-
-static int igb_setup_tc_block(struct igb_adapter *adapter,
- struct tc_block_offload *f)
-{
- if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
- return -EOPNOTSUPP;
-
- switch (f->command) {
- case TC_BLOCK_BIND:
- return tcf_block_cb_register(f->block, igb_setup_tc_block_cb,
- adapter, adapter, f->extack);
- case TC_BLOCK_UNBIND:
- tcf_block_cb_unregister(f->block, igb_setup_tc_block_cb,
- adapter);
- return 0;
default:
return -EOPNOTSUPP;
}
@@ -2849,6 +2813,8 @@
return 0;
}
+static LIST_HEAD(igb_block_cb_list);
+
static int igb_setup_tc(struct net_device *dev, enum tc_setup_type type,
void *type_data)
{
@@ -2858,13 +2824,168 @@
case TC_SETUP_QDISC_CBS:
return igb_offload_cbs(adapter, type_data);
case TC_SETUP_BLOCK:
- return igb_setup_tc_block(adapter, type_data);
+ return flow_block_cb_setup_simple(type_data,
+ &igb_block_cb_list,
+ igb_setup_tc_block_cb,
+ adapter, adapter, true);
+
case TC_SETUP_QDISC_ETF:
return igb_offload_txtime(adapter, type_data);
default:
return -EOPNOTSUPP;
}
+}
+
+static int igb_xdp_setup(struct net_device *dev, struct netdev_bpf *bpf)
+{
+ int i, frame_size = dev->mtu + IGB_ETH_PKT_HDR_PAD;
+ struct igb_adapter *adapter = netdev_priv(dev);
+ struct bpf_prog *prog = bpf->prog, *old_prog;
+ bool running = netif_running(dev);
+ bool need_reset;
+
+ /* verify igb ring attributes are sufficient for XDP */
+ for (i = 0; i < adapter->num_rx_queues; i++) {
+ struct igb_ring *ring = adapter->rx_ring[i];
+
+ if (frame_size > igb_rx_bufsz(ring)) {
+ NL_SET_ERR_MSG_MOD(bpf->extack,
+ "The RX buffer size is too small for the frame size");
+ netdev_warn(dev, "XDP RX buffer size %d is too small for the frame size %d\n",
+ igb_rx_bufsz(ring), frame_size);
+ return -EINVAL;
+ }
+ }
+
+ old_prog = xchg(&adapter->xdp_prog, prog);
+ need_reset = (!!prog != !!old_prog);
+
+ /* device is up and bpf is added/removed, must setup the RX queues */
+ if (need_reset && running) {
+ igb_close(dev);
+ } else {
+ for (i = 0; i < adapter->num_rx_queues; i++)
+ (void)xchg(&adapter->rx_ring[i]->xdp_prog,
+ adapter->xdp_prog);
+ }
+
+ if (old_prog)
+ bpf_prog_put(old_prog);
+
+ /* bpf is just replaced, RXQ and MTU are already setup */
+ if (!need_reset)
+ return 0;
+
+ if (running)
+ igb_open(dev);
+
+ return 0;
+}
+
+static int igb_xdp(struct net_device *dev, struct netdev_bpf *xdp)
+{
+ switch (xdp->command) {
+ case XDP_SETUP_PROG:
+ return igb_xdp_setup(dev, xdp);
+ default:
+ return -EINVAL;
+ }
+}
+
+static void igb_xdp_ring_update_tail(struct igb_ring *ring)
+{
+ /* Force memory writes to complete before letting h/w know there
+ * are new descriptors to fetch.
+ */
+ wmb();
+ writel(ring->next_to_use, ring->tail);
+}
+
+static struct igb_ring *igb_xdp_tx_queue_mapping(struct igb_adapter *adapter)
+{
+ unsigned int r_idx = smp_processor_id();
+
+ if (r_idx >= adapter->num_tx_queues)
+ r_idx = r_idx % adapter->num_tx_queues;
+
+ return adapter->tx_ring[r_idx];
+}
+
+static int igb_xdp_xmit_back(struct igb_adapter *adapter, struct xdp_buff *xdp)
+{
+ struct xdp_frame *xdpf = xdp_convert_buff_to_frame(xdp);
+ int cpu = smp_processor_id();
+ struct igb_ring *tx_ring;
+ struct netdev_queue *nq;
+ u32 ret;
+
+ if (unlikely(!xdpf))
+ return IGB_XDP_CONSUMED;
+
+ /* During program transitions its possible adapter->xdp_prog is assigned
+ * but ring has not been configured yet. In this case simply abort xmit.
+ */
+ tx_ring = adapter->xdp_prog ? igb_xdp_tx_queue_mapping(adapter) : NULL;
+ if (unlikely(!tx_ring))
+ return IGB_XDP_CONSUMED;
+
+ nq = txring_txq(tx_ring);
+ __netif_tx_lock(nq, cpu);
+ /* Avoid transmit queue timeout since we share it with the slow path */
+ nq->trans_start = jiffies;
+ ret = igb_xmit_xdp_ring(adapter, tx_ring, xdpf);
+ __netif_tx_unlock(nq);
+
+ return ret;
+}
+
+static int igb_xdp_xmit(struct net_device *dev, int n,
+ struct xdp_frame **frames, u32 flags)
+{
+ struct igb_adapter *adapter = netdev_priv(dev);
+ int cpu = smp_processor_id();
+ struct igb_ring *tx_ring;
+ struct netdev_queue *nq;
+ int drops = 0;
+ int i;
+
+ if (unlikely(test_bit(__IGB_DOWN, &adapter->state)))
+ return -ENETDOWN;
+
+ if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
+ return -EINVAL;
+
+ /* During program transitions its possible adapter->xdp_prog is assigned
+ * but ring has not been configured yet. In this case simply abort xmit.
+ */
+ tx_ring = adapter->xdp_prog ? igb_xdp_tx_queue_mapping(adapter) : NULL;
+ if (unlikely(!tx_ring))
+ return -ENXIO;
+
+ nq = txring_txq(tx_ring);
+ __netif_tx_lock(nq, cpu);
+
+ /* Avoid transmit queue timeout since we share it with the slow path */
+ nq->trans_start = jiffies;
+
+ for (i = 0; i < n; i++) {
+ struct xdp_frame *xdpf = frames[i];
+ int err;
+
+ err = igb_xmit_xdp_ring(adapter, tx_ring, xdpf);
+ if (err != IGB_XDP_TX) {
+ xdp_return_frame_rx_napi(xdpf);
+ drops++;
+ }
+ }
+
+ __netif_tx_unlock(nq);
+
+ if (unlikely(flags & XDP_XMIT_FLUSH))
+ igb_xdp_ring_update_tail(tx_ring);
+
+ return n - drops;
}
static const struct net_device_ops igb_netdev_ops = {
@@ -2891,6 +3012,8 @@
.ndo_fdb_add = igb_ndo_fdb_add,
.ndo_features_check = igb_features_check,
.ndo_setup_tc = igb_setup_tc,
+ .ndo_bpf = igb_xdp,
+ .ndo_xdp_xmit = igb_xdp_xmit,
};
/**
@@ -2915,7 +3038,7 @@
fw.invm_img_type);
break;
}
- /* fall through */
+ fallthrough;
default:
/* if option is rom valid, display its version too */
if (fw.or_valid) {
@@ -3157,7 +3280,7 @@
NETIF_F_HW_CSUM;
if (hw->mac.type >= e1000_82576)
- netdev->features |= NETIF_F_SCTP_CRC;
+ netdev->features |= NETIF_F_SCTP_CRC | NETIF_F_GSO_UDP_L4;
if (hw->mac.type >= e1000_i350)
netdev->features |= NETIF_F_HW_TC;
@@ -3431,7 +3554,9 @@
"Width x1" : "unknown"), netdev->dev_addr);
}
- if ((hw->mac.type >= e1000_i210 ||
+ if ((hw->mac.type == e1000_82576 &&
+ rd32(E1000_EECD) & E1000_EECD_PRES) ||
+ (hw->mac.type >= e1000_i210 ||
igb_get_flash_presence_i210(hw))) {
ret_val = igb_read_part_string(hw, part_str,
E1000_PBANUM_LENGTH);
@@ -3478,7 +3603,7 @@
}
}
- dev_pm_set_driver_flags(&pdev->dev, DPM_FLAG_NEVER_SKIP);
+ dev_pm_set_driver_flags(&pdev->dev, DPM_FLAG_NO_DIRECT_COMPLETE);
pm_runtime_put_noidle(&pdev->dev);
return 0;
@@ -3517,6 +3642,7 @@
struct net_device *netdev = pci_get_drvdata(pdev);
struct igb_adapter *adapter = netdev_priv(netdev);
struct e1000_hw *hw = &adapter->hw;
+ unsigned long flags;
/* reclaim resources allocated to VFs */
if (adapter->vf_data) {
@@ -3529,12 +3655,13 @@
pci_disable_sriov(pdev);
msleep(500);
}
-
+ spin_lock_irqsave(&adapter->vfs_lock, flags);
kfree(adapter->vf_mac_list);
adapter->vf_mac_list = NULL;
kfree(adapter->vf_data);
adapter->vf_data = NULL;
adapter->vfs_allocated_count = 0;
+ spin_unlock_irqrestore(&adapter->vfs_lock, flags);
wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
wrfl();
msleep(100);
@@ -3730,8 +3857,9 @@
struct pci_dev *pdev = adapter->pdev;
struct e1000_hw *hw = &adapter->hw;
- /* Virtualization features not supported on i210 family. */
- if ((hw->mac.type == e1000_i210) || (hw->mac.type == e1000_i211))
+ /* Virtualization features not supported on i210 and 82580 family. */
+ if ((hw->mac.type == e1000_i210) || (hw->mac.type == e1000_i211) ||
+ (hw->mac.type == e1000_82580))
return;
/* Of the below we really only want the effect of getting
@@ -3767,13 +3895,13 @@
max_rss_queues = 1;
break;
}
- /* fall through */
+ fallthrough;
case e1000_82576:
if (!!adapter->vfs_allocated_count) {
max_rss_queues = 2;
break;
}
- /* fall through */
+ fallthrough;
case e1000_82580:
case e1000_i354:
default:
@@ -3849,12 +3977,14 @@
/* set default work limits */
adapter->tx_work_limit = IGB_DEFAULT_TX_WORK;
- adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN +
- VLAN_HLEN;
+ adapter->max_frame_size = netdev->mtu + IGB_ETH_PKT_HDR_PAD;
adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
spin_lock_init(&adapter->nfc_lock);
spin_lock_init(&adapter->stats64_lock);
+
+ /* init spinlock to avoid concurrency of VF resources */
+ spin_lock_init(&adapter->vfs_lock);
#ifdef CONFIG_PCI_IOV
switch (hw->mac.type) {
case e1000_82576:
@@ -3912,6 +4042,7 @@
/**
* igb_open - Called when a network interface is made active
* @netdev: network interface device structure
+ * @resuming: indicates whether we are in a resume call
*
* Returns 0 on success, negative value on failure
*
@@ -4029,6 +4160,7 @@
/**
* igb_close - Disables a network interface
* @netdev: network interface device structure
+ * @suspending: indicates we are in a suspend call
*
* Returns 0, this is not allowed to fail
*
@@ -4222,6 +4354,7 @@
**/
int igb_setup_rx_resources(struct igb_ring *rx_ring)
{
+ struct igb_adapter *adapter = netdev_priv(rx_ring->netdev);
struct device *dev = rx_ring->dev;
int size;
@@ -4243,6 +4376,13 @@
rx_ring->next_to_alloc = 0;
rx_ring->next_to_clean = 0;
rx_ring->next_to_use = 0;
+
+ rx_ring->xdp_prog = adapter->xdp_prog;
+
+ /* XDP RX-queue info */
+ if (xdp_rxq_info_reg(&rx_ring->xdp_rxq, rx_ring->netdev,
+ rx_ring->queue_index) < 0)
+ goto err;
return 0;
@@ -4362,8 +4502,7 @@
else
mrqc |= E1000_MRQC_ENABLE_VMDQ;
} else {
- if (hw->mac.type != e1000_i211)
- mrqc |= E1000_MRQC_ENABLE_RSS_MQ;
+ mrqc |= E1000_MRQC_ENABLE_RSS_MQ;
}
igb_vmm_control(adapter);
@@ -4502,6 +4641,37 @@
}
/**
+ * igb_setup_srrctl - configure the split and replication receive control
+ * registers
+ * @adapter: Board private structure
+ * @ring: receive ring to be configured
+ **/
+void igb_setup_srrctl(struct igb_adapter *adapter, struct igb_ring *ring)
+{
+ struct e1000_hw *hw = &adapter->hw;
+ int reg_idx = ring->reg_idx;
+ u32 srrctl = 0;
+
+ srrctl = IGB_RX_HDR_LEN << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
+ if (ring_uses_large_buffer(ring))
+ srrctl |= IGB_RXBUFFER_3072 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
+ else
+ srrctl |= IGB_RXBUFFER_2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
+ srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
+ if (hw->mac.type >= e1000_82580)
+ srrctl |= E1000_SRRCTL_TIMESTAMP;
+ /* Only set Drop Enable if VFs allocated, or we are supporting multiple
+ * queues and rx flow control is disabled
+ */
+ if (adapter->vfs_allocated_count ||
+ (!(hw->fc.current_mode & e1000_fc_rx_pause) &&
+ adapter->num_rx_queues > 1))
+ srrctl |= E1000_SRRCTL_DROP_EN;
+
+ wr32(E1000_SRRCTL(reg_idx), srrctl);
+}
+
+/**
* igb_configure_rx_ring - Configure a receive ring after Reset
* @adapter: board private structure
* @ring: receive ring to be configured
@@ -4515,7 +4685,11 @@
union e1000_adv_rx_desc *rx_desc;
u64 rdba = ring->dma;
int reg_idx = ring->reg_idx;
- u32 srrctl = 0, rxdctl = 0;
+ u32 rxdctl = 0;
+
+ xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq);
+ WARN_ON(xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
+ MEM_TYPE_PAGE_SHARED, NULL));
/* disable the queue */
wr32(E1000_RXDCTL(reg_idx), 0);
@@ -4533,19 +4707,7 @@
writel(0, ring->tail);
/* set descriptor configuration */
- srrctl = IGB_RX_HDR_LEN << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
- if (ring_uses_large_buffer(ring))
- srrctl |= IGB_RXBUFFER_3072 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
- else
- srrctl |= IGB_RXBUFFER_2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
- srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
- if (hw->mac.type >= e1000_82580)
- srrctl |= E1000_SRRCTL_TIMESTAMP;
- /* Only set Drop Enable if we are supporting multiple queues */
- if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
- srrctl |= E1000_SRRCTL_DROP_EN;
-
- wr32(E1000_SRRCTL(reg_idx), srrctl);
+ igb_setup_srrctl(adapter, ring);
/* set filtering for VMDQ pools */
igb_set_vmolr(adapter, reg_idx & 0x7, true);
@@ -4570,6 +4732,10 @@
static void igb_set_rx_buffer_len(struct igb_adapter *adapter,
struct igb_ring *rx_ring)
{
+#if (PAGE_SIZE < 8192)
+ struct e1000_hw *hw = &adapter->hw;
+#endif
+
/* set build_skb and buffer size flags */
clear_ring_build_skb_enabled(rx_ring);
clear_ring_uses_large_buffer(rx_ring);
@@ -4580,10 +4746,9 @@
set_ring_build_skb_enabled(rx_ring);
#if (PAGE_SIZE < 8192)
- if (adapter->max_frame_size <= IGB_MAX_FRAME_BUILD_SKB)
- return;
-
- set_ring_uses_large_buffer(rx_ring);
+ if (adapter->max_frame_size > IGB_MAX_FRAME_BUILD_SKB ||
+ rd32(E1000_RCTL) & E1000_RCTL_SBP)
+ set_ring_uses_large_buffer(rx_ring);
#endif
}
@@ -4661,8 +4826,11 @@
while (i != tx_ring->next_to_use) {
union e1000_adv_tx_desc *eop_desc, *tx_desc;
- /* Free all the Tx ring sk_buffs */
- dev_kfree_skb_any(tx_buffer->skb);
+ /* Free all the Tx ring sk_buffs or xdp frames */
+ if (tx_buffer->type == IGB_TYPE_SKB)
+ dev_kfree_skb_any(tx_buffer->skb);
+ else
+ xdp_return_frame(tx_buffer->xdpf);
/* unmap skb header data */
dma_unmap_single(tx_ring->dev,
@@ -4735,6 +4903,8 @@
{
igb_clean_rx_ring(rx_ring);
+ rx_ring->xdp_prog = NULL;
+ xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
vfree(rx_ring->rx_buffer_info);
rx_ring->rx_buffer_info = NULL;
@@ -4771,8 +4941,7 @@
{
u16 i = rx_ring->next_to_clean;
- if (rx_ring->skb)
- dev_kfree_skb(rx_ring->skb);
+ dev_kfree_skb(rx_ring->skb);
rx_ring->skb = NULL;
/* Free all the Rx ring sk_buffs */
@@ -4896,14 +5065,14 @@
/* VLAN filtering needed for VLAN prio filter */
if (adapter->netdev->features & NETIF_F_NTUPLE)
break;
- /* fall through */
+ fallthrough;
case e1000_82576:
case e1000_82580:
case e1000_i354:
/* VLAN filtering needed for pool filtering */
if (adapter->vfs_allocated_count)
break;
- /* fall through */
+ fallthrough;
default:
return 1;
}
@@ -5183,7 +5352,7 @@
case e1000_media_type_copper:
if (!hw->mac.get_link_status)
return true;
- /* fall through */
+ fallthrough;
case e1000_media_type_internal_serdes:
hw->mac.ops.check_for_link(hw);
link_active = !hw->mac.get_link_status;
@@ -5247,7 +5416,7 @@
/**
* igb_watchdog - Timer Call-back
- * @data: pointer to adapter cast into an unsigned long
+ * @t: pointer to timer_list containing our private info pointer
**/
static void igb_watchdog(struct timer_list *t)
{
@@ -5346,7 +5515,8 @@
break;
}
- if (adapter->link_speed != SPEED_1000)
+ if (adapter->link_speed != SPEED_1000 ||
+ !hw->phy.ops.read_reg)
goto no_wait;
/* wait for Remote receiver status OK */
@@ -5714,8 +5884,8 @@
* should have been handled by the upper layers.
*/
if (tx_ring->launchtime_enable) {
- ts = ns_to_timespec64(first->skb->tstamp);
- first->skb->tstamp = 0;
+ ts = ktime_to_timespec64(first->skb->tstamp);
+ skb_txtime_consumed(first->skb);
context_desc->seqnum_seed = cpu_to_le32(ts.tv_nsec / 32);
} else {
context_desc->seqnum_seed = 0;
@@ -5735,6 +5905,7 @@
} ip;
union {
struct tcphdr *tcp;
+ struct udphdr *udp;
unsigned char *hdr;
} l4;
u32 paylen, l4_offset;
@@ -5754,7 +5925,8 @@
l4.hdr = skb_checksum_start(skb);
/* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
- type_tucmd = E1000_ADVTXD_TUCMD_L4T_TCP;
+ type_tucmd = (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) ?
+ E1000_ADVTXD_TUCMD_L4T_UDP : E1000_ADVTXD_TUCMD_L4T_TCP;
/* initialize outer IP header fields */
if (ip.v4->version == 4) {
@@ -5782,12 +5954,19 @@
/* determine offset of inner transport header */
l4_offset = l4.hdr - skb->data;
- /* compute length of segmentation header */
- *hdr_len = (l4.tcp->doff * 4) + l4_offset;
-
/* remove payload length from inner checksum */
paylen = skb->len - l4_offset;
- csum_replace_by_diff(&l4.tcp->check, htonl(paylen));
+ if (type_tucmd & E1000_ADVTXD_TUCMD_L4T_TCP) {
+ /* compute length of segmentation header */
+ *hdr_len = (l4.tcp->doff * 4) + l4_offset;
+ csum_replace_by_diff(&l4.tcp->check,
+ (__force __wsum)htonl(paylen));
+ } else {
+ /* compute length of segmentation header */
+ *hdr_len = sizeof(*l4.udp) + l4_offset;
+ csum_replace_by_diff(&l4.udp->check,
+ (__force __wsum)htonl(paylen));
+ }
/* update gso size and bytecount with header size */
first->gso_segs = skb_shinfo(skb)->gso_segs;
@@ -5834,7 +6013,7 @@
switch (skb->csum_offset) {
case offsetof(struct tcphdr, check):
type_tucmd = E1000_ADVTXD_TUCMD_L4T_TCP;
- /* fall through */
+ fallthrough;
case offsetof(struct udphdr, check):
break;
case offsetof(struct sctphdr, checksum):
@@ -5846,7 +6025,7 @@
type_tucmd = E1000_ADVTXD_TUCMD_L4T_SCTP;
break;
}
- /* fall through */
+ fallthrough;
default:
skb_checksum_help(skb);
goto csum_failed;
@@ -5958,7 +6137,7 @@
struct sk_buff *skb = first->skb;
struct igb_tx_buffer *tx_buffer;
union e1000_adv_tx_desc *tx_desc;
- struct skb_frag_struct *frag;
+ skb_frag_t *frag;
dma_addr_t dma;
unsigned int data_len, size;
u32 tx_flags = first->tx_flags;
@@ -6035,6 +6214,8 @@
/* set the timestamp */
first->time_stamp = jiffies;
+ skb_tx_timestamp(skb);
+
/* Force memory writes to complete before letting h/w know there
* are new descriptors to fetch. (Only applicable for weak-ordered
* memory model archs, such as IA-64).
@@ -6056,13 +6237,8 @@
/* Make sure there is space in the ring for the next send. */
igb_maybe_stop_tx(tx_ring, DESC_NEEDED);
- if (netif_xmit_stopped(txring_txq(tx_ring)) || !skb->xmit_more) {
+ if (netif_xmit_stopped(txring_txq(tx_ring)) || !netdev_xmit_more()) {
writel(i, tx_ring->tail);
-
- /* we need this if more than one processor can write to our tail
- * at a time, it synchronizes IO on IA64/Altix systems
- */
- mmiowb();
}
return 0;
@@ -6099,6 +6275,80 @@
return -1;
}
+int igb_xmit_xdp_ring(struct igb_adapter *adapter,
+ struct igb_ring *tx_ring,
+ struct xdp_frame *xdpf)
+{
+ union e1000_adv_tx_desc *tx_desc;
+ u32 len, cmd_type, olinfo_status;
+ struct igb_tx_buffer *tx_buffer;
+ dma_addr_t dma;
+ u16 i;
+
+ len = xdpf->len;
+
+ if (unlikely(!igb_desc_unused(tx_ring)))
+ return IGB_XDP_CONSUMED;
+
+ dma = dma_map_single(tx_ring->dev, xdpf->data, len, DMA_TO_DEVICE);
+ if (dma_mapping_error(tx_ring->dev, dma))
+ return IGB_XDP_CONSUMED;
+
+ /* record the location of the first descriptor for this packet */
+ tx_buffer = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
+ tx_buffer->bytecount = len;
+ tx_buffer->gso_segs = 1;
+ tx_buffer->protocol = 0;
+
+ i = tx_ring->next_to_use;
+ tx_desc = IGB_TX_DESC(tx_ring, i);
+
+ dma_unmap_len_set(tx_buffer, len, len);
+ dma_unmap_addr_set(tx_buffer, dma, dma);
+ tx_buffer->type = IGB_TYPE_XDP;
+ tx_buffer->xdpf = xdpf;
+
+ tx_desc->read.buffer_addr = cpu_to_le64(dma);
+
+ /* put descriptor type bits */
+ cmd_type = E1000_ADVTXD_DTYP_DATA |
+ E1000_ADVTXD_DCMD_DEXT |
+ E1000_ADVTXD_DCMD_IFCS;
+ cmd_type |= len | IGB_TXD_DCMD;
+ tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type);
+
+ olinfo_status = len << E1000_ADVTXD_PAYLEN_SHIFT;
+ /* 82575 requires a unique index per ring */
+ if (test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
+ olinfo_status |= tx_ring->reg_idx << 4;
+
+ tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
+
+ netdev_tx_sent_queue(txring_txq(tx_ring), tx_buffer->bytecount);
+
+ /* set the timestamp */
+ tx_buffer->time_stamp = jiffies;
+
+ /* Avoid any potential race with xdp_xmit and cleanup */
+ smp_wmb();
+
+ /* set next_to_watch value indicating a packet is present */
+ i++;
+ if (i == tx_ring->count)
+ i = 0;
+
+ tx_buffer->next_to_watch = tx_desc;
+ tx_ring->next_to_use = i;
+
+ /* Make sure there is space in the ring for the next send. */
+ igb_maybe_stop_tx(tx_ring, DESC_NEEDED);
+
+ if (netif_xmit_stopped(txring_txq(tx_ring)) || !netdev_xmit_more())
+ writel(i, tx_ring->tail);
+
+ return IGB_XDP_TX;
+}
+
netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb,
struct igb_ring *tx_ring)
{
@@ -6117,7 +6367,8 @@
* otherwise try next time
*/
for (f = 0; f < skb_shinfo(skb)->nr_frags; f++)
- count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size);
+ count += TXD_USE_COUNT(skb_frag_size(
+ &skb_shinfo(skb)->frags[f]));
if (igb_maybe_stop_tx(tx_ring, count + 3)) {
/* this is a hard error */
@@ -6126,6 +6377,7 @@
/* record the location of the first descriptor for this packet */
first = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
+ first->type = IGB_TYPE_SKB;
first->skb = skb;
first->bytecount = skb->len;
first->gso_segs = 1;
@@ -6162,8 +6414,6 @@
goto out_drop;
else if (!tso)
igb_tx_csum(tx_ring, first);
-
- skb_tx_timestamp(skb);
if (igb_tx_map(tx_ring, first, hdr_len))
goto cleanup_tx_tstamp;
@@ -6215,8 +6465,9 @@
/**
* igb_tx_timeout - Respond to a Tx Hang
* @netdev: network interface device structure
+ * @txqueue: number of the Tx queue that hung (unused)
**/
-static void igb_tx_timeout(struct net_device *netdev)
+static void igb_tx_timeout(struct net_device *netdev, unsigned int __always_unused txqueue)
{
struct igb_adapter *adapter = netdev_priv(netdev);
struct e1000_hw *hw = &adapter->hw;
@@ -6277,8 +6528,22 @@
static int igb_change_mtu(struct net_device *netdev, int new_mtu)
{
struct igb_adapter *adapter = netdev_priv(netdev);
- struct pci_dev *pdev = adapter->pdev;
- int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
+ int max_frame = new_mtu + IGB_ETH_PKT_HDR_PAD;
+
+ if (adapter->xdp_prog) {
+ int i;
+
+ for (i = 0; i < adapter->num_rx_queues; i++) {
+ struct igb_ring *ring = adapter->rx_ring[i];
+
+ if (max_frame > igb_rx_bufsz(ring)) {
+ netdev_warn(adapter->netdev,
+ "Requested MTU size is not supported with XDP. Max frame size is %d\n",
+ max_frame);
+ return -EINVAL;
+ }
+ }
+ }
/* adjust max frame to be at least the size of a standard frame */
if (max_frame < (ETH_FRAME_LEN + ETH_FCS_LEN))
@@ -6293,8 +6558,8 @@
if (netif_running(netdev))
igb_down(adapter);
- dev_info(&pdev->dev, "changing MTU from %d to %d\n",
- netdev->mtu, new_mtu);
+ netdev_dbg(netdev, "changing MTU from %d to %d\n",
+ netdev->mtu, new_mtu);
netdev->mtu = new_mtu;
if (netif_running(netdev))
@@ -6738,7 +7003,7 @@
igb_setup_dca(adapter);
break;
}
- /* Fall Through since DCA is disabled. */
+ fallthrough; /* since DCA is disabled. */
case DCA_PROVIDER_REMOVE:
if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
/* without this a class_device is left
@@ -7157,7 +7422,7 @@
{
struct e1000_hw *hw = &adapter->hw;
unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
- u32 reg, msgbuf[3];
+ u32 reg, msgbuf[3] = {};
u8 *addr = (u8 *)(&msgbuf[1]);
/* process all the same items cleared in a function level reset */
@@ -7191,7 +7456,7 @@
for (i = 0; i < hw->mac.rar_entry_count; i++) {
adapter->mac_table[i].state &= ~IGB_MAC_STATE_IN_USE;
- memset(adapter->mac_table[i].addr, 0, ETH_ALEN);
+ eth_zero_addr(adapter->mac_table[i].addr);
adapter->mac_table[i].queue = 0;
igb_rar_set_index(adapter, i);
}
@@ -7340,7 +7605,7 @@
} else {
adapter->mac_table[i].state = 0;
adapter->mac_table[i].queue = 0;
- memset(adapter->mac_table[i].addr, 0, ETH_ALEN);
+ eth_zero_addr(adapter->mac_table[i].addr);
}
igb_rar_set_index(adapter, i);
@@ -7600,8 +7865,10 @@
static void igb_msg_task(struct igb_adapter *adapter)
{
struct e1000_hw *hw = &adapter->hw;
+ unsigned long flags;
u32 vf;
+ spin_lock_irqsave(&adapter->vfs_lock, flags);
for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
/* process any reset requests */
if (!igb_check_for_rst(hw, vf))
@@ -7615,6 +7882,7 @@
if (!igb_check_for_ack(hw, vf))
igb_rcv_ack_from_vf(adapter, vf);
}
+ spin_unlock_irqrestore(&adapter->vfs_lock, flags);
}
/**
@@ -7778,11 +8046,13 @@
if (!clean_complete)
return budget;
- /* If not enough Rx work done, exit the polling mode */
- napi_complete_done(napi, work_done);
- igb_ring_irq_enable(q_vector);
+ /* Exit the polling mode, but don't re-enable interrupts if stack might
+ * poll us due to busy-polling
+ */
+ if (likely(napi_complete_done(napi, work_done)))
+ igb_ring_irq_enable(q_vector);
- return 0;
+ return work_done;
}
/**
@@ -7831,7 +8101,10 @@
total_packets += tx_buffer->gso_segs;
/* free the skb */
- napi_consume_skb(tx_buffer->skb, napi_budget);
+ if (tx_buffer->type == IGB_TYPE_SKB)
+ napi_consume_skb(tx_buffer->skb, napi_budget);
+ else
+ xdp_return_frame(tx_buffer->xdpf);
/* unmap skb header data */
dma_unmap_single(tx_ring->dev,
@@ -7990,7 +8263,8 @@
return (page_to_nid(page) != numa_mem_id()) || page_is_pfmemalloc(page);
}
-static bool igb_can_reuse_rx_page(struct igb_rx_buffer *rx_buffer)
+static bool igb_can_reuse_rx_page(struct igb_rx_buffer *rx_buffer,
+ int rx_buf_pgcnt)
{
unsigned int pagecnt_bias = rx_buffer->pagecnt_bias;
struct page *page = rx_buffer->page;
@@ -8001,7 +8275,7 @@
#if (PAGE_SIZE < 8192)
/* if we are only owner of page we can reuse it */
- if (unlikely((page_ref_count(page) - pagecnt_bias) > 1))
+ if (unlikely((rx_buf_pgcnt - pagecnt_bias) > 1))
return false;
#else
#define IGB_LAST_OFFSET \
@@ -8015,8 +8289,8 @@
* the pagecnt_bias and page count so that we fully restock the
* number of references the driver holds.
*/
- if (unlikely(!pagecnt_bias)) {
- page_ref_add(page, USHRT_MAX);
+ if (unlikely(pagecnt_bias == 1)) {
+ page_ref_add(page, USHRT_MAX - 1);
rx_buffer->pagecnt_bias = USHRT_MAX;
}
@@ -8055,23 +8329,21 @@
static struct sk_buff *igb_construct_skb(struct igb_ring *rx_ring,
struct igb_rx_buffer *rx_buffer,
- union e1000_adv_rx_desc *rx_desc,
- unsigned int size)
+ struct xdp_buff *xdp,
+ union e1000_adv_rx_desc *rx_desc)
{
- void *va = page_address(rx_buffer->page) + rx_buffer->page_offset;
#if (PAGE_SIZE < 8192)
unsigned int truesize = igb_rx_pg_size(rx_ring) / 2;
#else
- unsigned int truesize = SKB_DATA_ALIGN(size);
+ unsigned int truesize = SKB_DATA_ALIGN(xdp->data_end -
+ xdp->data_hard_start);
#endif
+ unsigned int size = xdp->data_end - xdp->data;
unsigned int headlen;
struct sk_buff *skb;
/* prefetch first cache line of first page */
- prefetch(va);
-#if L1_CACHE_BYTES < 128
- prefetch(va + L1_CACHE_BYTES);
-#endif
+ net_prefetch(xdp->data);
/* allocate a skb to store the frags */
skb = napi_alloc_skb(&rx_ring->q_vector->napi, IGB_RX_HDR_LEN);
@@ -8079,24 +8351,25 @@
return NULL;
if (unlikely(igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP))) {
- igb_ptp_rx_pktstamp(rx_ring->q_vector, va, skb);
- va += IGB_TS_HDR_LEN;
- size -= IGB_TS_HDR_LEN;
+ if (!igb_ptp_rx_pktstamp(rx_ring->q_vector, xdp->data, skb)) {
+ xdp->data += IGB_TS_HDR_LEN;
+ size -= IGB_TS_HDR_LEN;
+ }
}
/* Determine available headroom for copy */
headlen = size;
if (headlen > IGB_RX_HDR_LEN)
- headlen = eth_get_headlen(va, IGB_RX_HDR_LEN);
+ headlen = eth_get_headlen(skb->dev, xdp->data, IGB_RX_HDR_LEN);
/* align pull length to size of long to optimize memcpy performance */
- memcpy(__skb_put(skb, headlen), va, ALIGN(headlen, sizeof(long)));
+ memcpy(__skb_put(skb, headlen), xdp->data, ALIGN(headlen, sizeof(long)));
/* update all of the pointers */
size -= headlen;
if (size) {
skb_add_rx_frag(skb, 0, rx_buffer->page,
- (va + headlen) - page_address(rx_buffer->page),
+ (xdp->data + headlen) - page_address(rx_buffer->page),
size, truesize);
#if (PAGE_SIZE < 8192)
rx_buffer->page_offset ^= truesize;
@@ -8112,37 +8385,38 @@
static struct sk_buff *igb_build_skb(struct igb_ring *rx_ring,
struct igb_rx_buffer *rx_buffer,
- union e1000_adv_rx_desc *rx_desc,
- unsigned int size)
+ struct xdp_buff *xdp,
+ union e1000_adv_rx_desc *rx_desc)
{
- void *va = page_address(rx_buffer->page) + rx_buffer->page_offset;
#if (PAGE_SIZE < 8192)
unsigned int truesize = igb_rx_pg_size(rx_ring) / 2;
#else
unsigned int truesize = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) +
- SKB_DATA_ALIGN(IGB_SKB_PAD + size);
+ SKB_DATA_ALIGN(xdp->data_end -
+ xdp->data_hard_start);
#endif
+ unsigned int metasize = xdp->data - xdp->data_meta;
struct sk_buff *skb;
/* prefetch first cache line of first page */
- prefetch(va);
-#if L1_CACHE_BYTES < 128
- prefetch(va + L1_CACHE_BYTES);
-#endif
+ net_prefetch(xdp->data_meta);
/* build an skb around the page buffer */
- skb = build_skb(va - IGB_SKB_PAD, truesize);
+ skb = build_skb(xdp->data_hard_start, truesize);
if (unlikely(!skb))
return NULL;
/* update pointers within the skb to store the data */
- skb_reserve(skb, IGB_SKB_PAD);
- __skb_put(skb, size);
+ skb_reserve(skb, xdp->data - xdp->data_hard_start);
+ __skb_put(skb, xdp->data_end - xdp->data);
+
+ if (metasize)
+ skb_metadata_set(skb, metasize);
/* pull timestamp out of packet data */
if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) {
- igb_ptp_rx_pktstamp(rx_ring->q_vector, skb->data, skb);
- __skb_pull(skb, IGB_TS_HDR_LEN);
+ if (!igb_ptp_rx_pktstamp(rx_ring->q_vector, skb->data, skb))
+ __skb_pull(skb, IGB_TS_HDR_LEN);
}
/* update buffer offset */
@@ -8153,6 +8427,81 @@
#endif
return skb;
+}
+
+static struct sk_buff *igb_run_xdp(struct igb_adapter *adapter,
+ struct igb_ring *rx_ring,
+ struct xdp_buff *xdp)
+{
+ int err, result = IGB_XDP_PASS;
+ struct bpf_prog *xdp_prog;
+ u32 act;
+
+ rcu_read_lock();
+ xdp_prog = READ_ONCE(rx_ring->xdp_prog);
+
+ if (!xdp_prog)
+ goto xdp_out;
+
+ prefetchw(xdp->data_hard_start); /* xdp_frame write */
+
+ act = bpf_prog_run_xdp(xdp_prog, xdp);
+ switch (act) {
+ case XDP_PASS:
+ break;
+ case XDP_TX:
+ result = igb_xdp_xmit_back(adapter, xdp);
+ if (result == IGB_XDP_CONSUMED)
+ goto out_failure;
+ break;
+ case XDP_REDIRECT:
+ err = xdp_do_redirect(adapter->netdev, xdp, xdp_prog);
+ if (err)
+ goto out_failure;
+ result = IGB_XDP_REDIR;
+ break;
+ default:
+ bpf_warn_invalid_xdp_action(act);
+ fallthrough;
+ case XDP_ABORTED:
+out_failure:
+ trace_xdp_exception(rx_ring->netdev, xdp_prog, act);
+ fallthrough;
+ case XDP_DROP:
+ result = IGB_XDP_CONSUMED;
+ break;
+ }
+xdp_out:
+ rcu_read_unlock();
+ return ERR_PTR(-result);
+}
+
+static unsigned int igb_rx_frame_truesize(struct igb_ring *rx_ring,
+ unsigned int size)
+{
+ unsigned int truesize;
+
+#if (PAGE_SIZE < 8192)
+ truesize = igb_rx_pg_size(rx_ring) / 2; /* Must be power-of-2 */
+#else
+ truesize = ring_uses_build_skb(rx_ring) ?
+ SKB_DATA_ALIGN(IGB_SKB_PAD + size) +
+ SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) :
+ SKB_DATA_ALIGN(size);
+#endif
+ return truesize;
+}
+
+static void igb_rx_buffer_flip(struct igb_ring *rx_ring,
+ struct igb_rx_buffer *rx_buffer,
+ unsigned int size)
+{
+ unsigned int truesize = igb_rx_frame_truesize(rx_ring, size);
+#if (PAGE_SIZE < 8192)
+ rx_buffer->page_offset ^= truesize;
+#else
+ rx_buffer->page_offset += truesize;
+#endif
}
static inline void igb_rx_checksum(struct igb_ring *ring,
@@ -8209,7 +8558,6 @@
* igb_is_non_eop - process handling of non-EOP buffers
* @rx_ring: Rx ring being processed
* @rx_desc: Rx descriptor for current buffer
- * @skb: current socket buffer containing buffer in progress
*
* This function updates next to clean. If the buffer is an EOP buffer
* this function exits returning false, otherwise it will place the
@@ -8251,6 +8599,10 @@
union e1000_adv_rx_desc *rx_desc,
struct sk_buff *skb)
{
+ /* XDP packets use error pointer so abort at this point */
+ if (IS_ERR(skb))
+ return true;
+
if (unlikely((igb_test_staterr(rx_desc,
E1000_RXDEXT_ERR_FRAME_ERR_MASK)))) {
struct net_device *netdev = rx_ring->netdev;
@@ -8297,7 +8649,7 @@
if (igb_test_staterr(rx_desc, E1000_RXDEXT_STATERR_LB) &&
test_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &rx_ring->flags))
- vid = be16_to_cpu(rx_desc->wb.upper.vlan);
+ vid = be16_to_cpu((__force __be16)rx_desc->wb.upper.vlan);
else
vid = le16_to_cpu(rx_desc->wb.upper.vlan);
@@ -8309,12 +8661,23 @@
skb->protocol = eth_type_trans(skb, rx_ring->netdev);
}
+static unsigned int igb_rx_offset(struct igb_ring *rx_ring)
+{
+ return ring_uses_build_skb(rx_ring) ? IGB_SKB_PAD : 0;
+}
+
static struct igb_rx_buffer *igb_get_rx_buffer(struct igb_ring *rx_ring,
- const unsigned int size)
+ const unsigned int size, int *rx_buf_pgcnt)
{
struct igb_rx_buffer *rx_buffer;
rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean];
+ *rx_buf_pgcnt =
+#if (PAGE_SIZE < 8192)
+ page_count(rx_buffer->page);
+#else
+ 0;
+#endif
prefetchw(rx_buffer->page);
/* we are reusing so sync this buffer for CPU use */
@@ -8330,9 +8693,9 @@
}
static void igb_put_rx_buffer(struct igb_ring *rx_ring,
- struct igb_rx_buffer *rx_buffer)
+ struct igb_rx_buffer *rx_buffer, int rx_buf_pgcnt)
{
- if (igb_can_reuse_rx_page(rx_buffer)) {
+ if (igb_can_reuse_rx_page(rx_buffer, rx_buf_pgcnt)) {
/* hand second half of page back to the ring */
igb_reuse_rx_page(rx_ring, rx_buffer);
} else {
@@ -8352,10 +8715,21 @@
static int igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget)
{
+ struct igb_adapter *adapter = q_vector->adapter;
struct igb_ring *rx_ring = q_vector->rx.ring;
struct sk_buff *skb = rx_ring->skb;
unsigned int total_bytes = 0, total_packets = 0;
u16 cleaned_count = igb_desc_unused(rx_ring);
+ unsigned int xdp_xmit = 0;
+ struct xdp_buff xdp;
+ int rx_buf_pgcnt;
+
+ xdp.rxq = &rx_ring->xdp_rxq;
+
+ /* Frame size depend on rx_ring setup when PAGE_SIZE=4K */
+#if (PAGE_SIZE < 8192)
+ xdp.frame_sz = igb_rx_frame_truesize(rx_ring, 0);
+#endif
while (likely(total_packets < budget)) {
union e1000_adv_rx_desc *rx_desc;
@@ -8379,16 +8753,41 @@
*/
dma_rmb();
- rx_buffer = igb_get_rx_buffer(rx_ring, size);
+ rx_buffer = igb_get_rx_buffer(rx_ring, size, &rx_buf_pgcnt);
/* retrieve a buffer from the ring */
- if (skb)
+ if (!skb) {
+ xdp.data = page_address(rx_buffer->page) +
+ rx_buffer->page_offset;
+ xdp.data_meta = xdp.data;
+ xdp.data_hard_start = xdp.data -
+ igb_rx_offset(rx_ring);
+ xdp.data_end = xdp.data + size;
+#if (PAGE_SIZE > 4096)
+ /* At larger PAGE_SIZE, frame_sz depend on len size */
+ xdp.frame_sz = igb_rx_frame_truesize(rx_ring, size);
+#endif
+ skb = igb_run_xdp(adapter, rx_ring, &xdp);
+ }
+
+ if (IS_ERR(skb)) {
+ unsigned int xdp_res = -PTR_ERR(skb);
+
+ if (xdp_res & (IGB_XDP_TX | IGB_XDP_REDIR)) {
+ xdp_xmit |= xdp_res;
+ igb_rx_buffer_flip(rx_ring, rx_buffer, size);
+ } else {
+ rx_buffer->pagecnt_bias++;
+ }
+ total_packets++;
+ total_bytes += size;
+ } else if (skb)
igb_add_rx_frag(rx_ring, rx_buffer, skb, size);
else if (ring_uses_build_skb(rx_ring))
- skb = igb_build_skb(rx_ring, rx_buffer, rx_desc, size);
+ skb = igb_build_skb(rx_ring, rx_buffer, &xdp, rx_desc);
else
skb = igb_construct_skb(rx_ring, rx_buffer,
- rx_desc, size);
+ &xdp, rx_desc);
/* exit if we failed to retrieve a buffer */
if (!skb) {
@@ -8397,7 +8796,7 @@
break;
}
- igb_put_rx_buffer(rx_ring, rx_buffer);
+ igb_put_rx_buffer(rx_ring, rx_buffer, rx_buf_pgcnt);
cleaned_count++;
/* fetch next buffer in frame if non-eop */
@@ -8428,6 +8827,15 @@
/* place incomplete frames back on ring for completion */
rx_ring->skb = skb;
+ if (xdp_xmit & IGB_XDP_REDIR)
+ xdp_do_flush();
+
+ if (xdp_xmit & IGB_XDP_TX) {
+ struct igb_ring *tx_ring = igb_xdp_tx_queue_mapping(adapter);
+
+ igb_xdp_ring_update_tail(tx_ring);
+ }
+
u64_stats_update_begin(&rx_ring->rx_syncp);
rx_ring->rx_stats.packets += total_packets;
rx_ring->rx_stats.bytes += total_bytes;
@@ -8439,11 +8847,6 @@
igb_alloc_rx_buffers(rx_ring, cleaned_count);
return total_packets;
-}
-
-static inline unsigned int igb_rx_offset(struct igb_ring *rx_ring)
-{
- return ring_uses_build_skb(rx_ring) ? IGB_SKB_PAD : 0;
}
static bool igb_alloc_mapped_page(struct igb_ring *rx_ring,
@@ -8482,14 +8885,16 @@
bi->dma = dma;
bi->page = page;
bi->page_offset = igb_rx_offset(rx_ring);
- bi->pagecnt_bias = 1;
+ page_ref_add(page, USHRT_MAX - 1);
+ bi->pagecnt_bias = USHRT_MAX;
return true;
}
/**
- * igb_alloc_rx_buffers - Replace used receive buffers; packet split
- * @adapter: address of board private structure
+ * igb_alloc_rx_buffers - Replace used receive buffers
+ * @rx_ring: rx descriptor ring to allocate new receive buffers
+ * @cleaned_count: count of buffers to allocate
**/
void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count)
{
@@ -8558,9 +8963,9 @@
/**
* igb_mii_ioctl -
- * @netdev:
- * @ifreq:
- * @cmd:
+ * @netdev: pointer to netdev struct
+ * @ifr: interface structure
+ * @cmd: ioctl command to execute
**/
static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
{
@@ -8588,9 +8993,9 @@
/**
* igb_ioctl -
- * @netdev:
- * @ifreq:
- * @cmd:
+ * @netdev: pointer to netdev struct
+ * @ifr: interface structure
+ * @cmd: ioctl command to execute
**/
static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
{
@@ -8875,7 +9280,7 @@
return __igb_shutdown(to_pci_dev(dev), NULL, 0);
}
-static int __maybe_unused igb_resume(struct device *dev)
+static int __maybe_unused __igb_resume(struct device *dev, bool rpm)
{
struct pci_dev *pdev = to_pci_dev(dev);
struct net_device *netdev = pci_get_drvdata(pdev);
@@ -8918,21 +9323,27 @@
wr32(E1000_WUS, ~0);
- rtnl_lock();
+ if (!rpm)
+ rtnl_lock();
if (!err && netif_running(netdev))
err = __igb_open(netdev, true);
if (!err)
netif_device_attach(netdev);
- rtnl_unlock();
+ if (!rpm)
+ rtnl_unlock();
return err;
}
+static int __maybe_unused igb_resume(struct device *dev)
+{
+ return __igb_resume(dev, false);
+}
+
static int __maybe_unused igb_runtime_idle(struct device *dev)
{
- struct pci_dev *pdev = to_pci_dev(dev);
- struct net_device *netdev = pci_get_drvdata(pdev);
+ struct net_device *netdev = dev_get_drvdata(dev);
struct igb_adapter *adapter = netdev_priv(netdev);
if (!igb_has_link(adapter))
@@ -8948,7 +9359,7 @@
static int __maybe_unused igb_runtime_resume(struct device *dev)
{
- return igb_resume(dev);
+ return __igb_resume(dev, true);
}
static void igb_shutdown(struct pci_dev *pdev)
@@ -9046,6 +9457,11 @@
struct net_device *netdev = pci_get_drvdata(pdev);
struct igb_adapter *adapter = netdev_priv(netdev);
+ if (state == pci_channel_io_normal) {
+ dev_warn(&pdev->dev, "Non-correctable non-fatal error reported.\n");
+ return PCI_ERS_RESULT_CAN_RECOVER;
+ }
+
netif_device_detach(netdev);
if (state == pci_channel_io_perm_failure)
@@ -9064,7 +9480,7 @@
* @pdev: Pointer to PCI device
*
* Restart the card from scratch, as if from a cold-boot. Implementation
- * resembles the first-half of the igb_resume routine.
+ * resembles the first-half of the __igb_resume routine.
**/
static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
{
@@ -9072,7 +9488,6 @@
struct igb_adapter *adapter = netdev_priv(netdev);
struct e1000_hw *hw = &adapter->hw;
pci_ers_result_t result;
- int err;
if (pci_enable_device_mem(pdev)) {
dev_err(&pdev->dev,
@@ -9096,14 +9511,6 @@
result = PCI_ERS_RESULT_RECOVERED;
}
- err = pci_cleanup_aer_uncorrect_error_status(pdev);
- if (err) {
- dev_err(&pdev->dev,
- "pci_cleanup_aer_uncorrect_error_status failed 0x%0x\n",
- err);
- /* non-fatal, continue */
- }
-
return result;
}
@@ -9113,7 +9520,7 @@
*
* This callback is called when the error recovery driver tells us that
* its OK to resume normal operation. Implementation resembles the
- * second-half of the igb_resume routine.
+ * second-half of the __igb_resume routine.
*/
static void igb_io_resume(struct pci_dev *pdev)
{
@@ -9415,13 +9822,13 @@
reg = rd32(E1000_DTXCTL);
reg |= E1000_DTXCTL_VLAN_ADDED;
wr32(E1000_DTXCTL, reg);
- /* Fall through */
+ fallthrough;
case e1000_82580:
/* enable replication vlan tag stripping */
reg = rd32(E1000_RPLOLR);
reg |= E1000_RPLOLR_STRVLAN;
wr32(E1000_RPLOLR, reg);
- /* Fall through */
+ fallthrough;
case e1000_i350:
/* none of the above registers are supported by i350 */
break;
@@ -9443,11 +9850,10 @@
struct e1000_hw *hw = &adapter->hw;
u32 dmac_thr;
u16 hwm;
+ u32 reg;
if (hw->mac.type > e1000_82580) {
if (adapter->flags & IGB_FLAG_DMAC) {
- u32 reg;
-
/* force threshold to 0. */
wr32(E1000_DMCTXTH, 0);
@@ -9480,7 +9886,6 @@
/* Disable BMC-to-OS Watchdog Enable */
if (hw->mac.type != e1000_i354)
reg &= ~E1000_DMACR_DC_BMC2OSW_EN;
-
wr32(E1000_DMACR, reg);
/* no lower threshold to disable
@@ -9497,12 +9902,12 @@
*/
wr32(E1000_DMCTXTH, (IGB_MIN_TXPBSIZE -
(IGB_TX_BUF_4096 + adapter->max_frame_size)) >> 6);
+ }
- /* make low power state decision controlled
- * by DMA coal
- */
+ if (hw->mac.type >= e1000_i210 ||
+ (adapter->flags & IGB_FLAG_DMAC)) {
reg = rd32(E1000_PCIEMISC);
- reg &= ~E1000_PCIEMISC_LX_DECISION;
+ reg |= E1000_PCIEMISC_LX_DECISION;
wr32(E1000_PCIEMISC, reg);
} /* endif adapter->dmac is not disabled */
} else if (hw->mac.type == e1000_82580) {
--
Gitblit v1.6.2