From 6778948f9de86c3cfaf36725a7c87dcff9ba247f Mon Sep 17 00:00:00 2001 From: hc <hc@nodka.com> Date: Mon, 11 Dec 2023 08:20:59 +0000 Subject: [PATCH] kernel_5.10 no rt --- kernel/drivers/net/ethernet/intel/igb/igb_main.c | 886 ++++++++++++++++++++++++++++++++++++++++++---------------- 1 files changed, 640 insertions(+), 246 deletions(-) diff --git a/kernel/drivers/net/ethernet/intel/igb/igb_main.c b/kernel/drivers/net/ethernet/intel/igb/igb_main.c index 74b50f1..f24f1a8 100644 --- a/kernel/drivers/net/ethernet/intel/igb/igb_main.c +++ b/kernel/drivers/net/ethernet/intel/igb/igb_main.c @@ -30,6 +30,8 @@ #include <linux/if_ether.h> #include <linux/aer.h> #include <linux/prefetch.h> +#include <linux/bpf.h> +#include <linux/bpf_trace.h> #include <linux/pm_runtime.h> #include <linux/etherdevice.h> #ifdef CONFIG_IGB_DCA @@ -37,12 +39,6 @@ #endif #include <linux/i2c.h> #include "igb.h" - -#define MAJ 5 -#define MIN 4 -#define BUILD 0 -#define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \ -__stringify(BUILD) "-k" enum queue_mode { QUEUE_MODE_STRICT_PRIORITY, @@ -55,7 +51,6 @@ }; char igb_driver_name[] = "igb"; -char igb_driver_version[] = DRV_VERSION; static const char igb_driver_string[] = "Intel(R) Gigabit Ethernet Network Driver"; static const char igb_copyright[] = @@ -146,7 +141,7 @@ static bool igb_clean_tx_irq(struct igb_q_vector *, int); static int igb_clean_rx_irq(struct igb_q_vector *, int); static int igb_ioctl(struct net_device *, struct ifreq *, int cmd); -static void igb_tx_timeout(struct net_device *); +static void igb_tx_timeout(struct net_device *, unsigned int txqueue); static void igb_reset_task(struct work_struct *); static void igb_vlan_mode(struct net_device *netdev, netdev_features_t features); @@ -239,8 +234,7 @@ MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>"); MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver"); -MODULE_LICENSE("GPL"); -MODULE_VERSION(DRV_VERSION); +MODULE_LICENSE("GPL v2"); #define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV|NETIF_MSG_PROBE|NETIF_MSG_LINK) static int debug = -1; @@ -557,8 +551,7 @@ /** * igb_get_i2c_data - Reads the I2C SDA data bit - * @hw: pointer to hardware structure - * @i2cctl: Current value of I2CCTL register + * @data: opaque pointer to adapter struct * * Returns the I2C data bit value **/ @@ -666,8 +659,7 @@ { int ret; - pr_info("%s - version %s\n", - igb_driver_string, igb_driver_version); + pr_info("%s\n", igb_driver_string); pr_info("%s\n", igb_copyright); #ifdef CONFIG_IGB_DCA @@ -720,14 +712,13 @@ adapter->rx_ring[i]->reg_idx = rbase_offset + Q_IDX_82576(i); } - /* Fall through */ + fallthrough; case e1000_82575: case e1000_82580: case e1000_i350: case e1000_i354: case e1000_i210: case e1000_i211: - /* Fall through */ default: for (; i < adapter->num_rx_queues; i++) adapter->rx_ring[i]->reg_idx = rbase_offset + i; @@ -753,6 +744,8 @@ struct net_device *netdev = igb->netdev; hw->hw_addr = NULL; netdev_err(netdev, "PCIe link lost\n"); + WARN(pci_device_is_present(igb->pdev), + "igb: Failed to read reg 0x%x!\n", reg); } return value; @@ -1196,15 +1189,15 @@ { struct igb_q_vector *q_vector; struct igb_ring *ring; - int ring_count, size; + int ring_count; + size_t size; /* igb only supports 1 Tx and/or 1 Rx queue per vector */ if (txr_count > 1 || rxr_count > 1) return -ENOMEM; ring_count = txr_count + rxr_count; - size = sizeof(struct igb_q_vector) + - (sizeof(struct igb_ring) * ring_count); + size = struct_size(q_vector, ring, ring_count); /* allocate q_vector and rings */ q_vector = adapter->q_vector[v_idx]; @@ -1858,13 +1851,12 @@ * configuration' in respect to these parameters. */ - netdev_dbg(netdev, "Qav Tx mode: cbs %s, launchtime %s, queue %d \ - idleslope %d sendslope %d hiCredit %d \ - locredit %d\n", - (ring->cbs_enable) ? "enabled" : "disabled", - (ring->launchtime_enable) ? "enabled" : "disabled", queue, - ring->idleslope, ring->sendslope, ring->hicredit, - ring->locredit); + netdev_dbg(netdev, "Qav Tx mode: cbs %s, launchtime %s, queue %d idleslope %d sendslope %d hiCredit %d locredit %d\n", + ring->cbs_enable ? "enabled" : "disabled", + ring->launchtime_enable ? "enabled" : "disabled", + queue, + ring->idleslope, ring->sendslope, + ring->hicredit, ring->locredit); } static int igb_save_txtime_params(struct igb_adapter *adapter, int queue, @@ -1943,7 +1935,7 @@ val = rd32(E1000_RXPBS); val &= ~I210_RXPBSIZE_MASK; - val |= I210_RXPBSIZE_PB_32KB; + val |= I210_RXPBSIZE_PB_30KB; wr32(E1000_RXPBS, val); /* Section 8.12.9 states that MAX_TPKT_SIZE from DTXMXPKTSZ @@ -2237,7 +2229,6 @@ void igb_reinit_locked(struct igb_adapter *adapter) { - WARN_ON(in_interrupt()); while (test_and_set_bit(__IGB_RESETTING, &adapter->state)) usleep_range(1000, 2000); igb_down(adapter); @@ -2379,7 +2370,7 @@ adapter->ei.get_invariants(hw); adapter->flags &= ~IGB_FLAG_MEDIA_RESET; } - if ((mac->type == e1000_82575) && + if ((mac->type == e1000_82575 || mac->type == e1000_i350) && (adapter->flags & IGB_FLAG_MAS_ENABLE)) { igb_enable_mas(adapter); } @@ -2490,13 +2481,14 @@ else igb_reset(adapter); - return 0; + return 1; } static int igb_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 vid, - u16 flags) + u16 flags, + struct netlink_ext_ack *extack) { /* guarantee we can provide a unique filter for the unicast address */ if (is_unicast_ether_addr(addr) || is_link_local_ether_addr(addr)) { @@ -2524,6 +2516,7 @@ if (unlikely(mac_hdr_len > IGB_MAX_MAC_HDR_LEN)) return features & ~(NETIF_F_HW_CSUM | NETIF_F_SCTP_CRC | + NETIF_F_GSO_UDP_L4 | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_TSO | NETIF_F_TSO6); @@ -2532,6 +2525,7 @@ if (unlikely(network_hdr_len > IGB_MAX_NETWORK_HDR_LEN)) return features & ~(NETIF_F_HW_CSUM | NETIF_F_SCTP_CRC | + NETIF_F_GSO_UDP_L4 | NETIF_F_TSO | NETIF_F_TSO6); @@ -2586,13 +2580,15 @@ #define VLAN_PRIO_FULL_MASK (0x07) static int igb_parse_cls_flower(struct igb_adapter *adapter, - struct tc_cls_flower_offload *f, + struct flow_cls_offload *f, int traffic_class, struct igb_nfc_filter *input) { + struct flow_rule *rule = flow_cls_offload_flow_rule(f); + struct flow_dissector *dissector = rule->match.dissector; struct netlink_ext_ack *extack = f->common.extack; - if (f->dissector->used_keys & + if (dissector->used_keys & ~(BIT(FLOW_DISSECTOR_KEY_BASIC) | BIT(FLOW_DISSECTOR_KEY_CONTROL) | BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) | @@ -2602,78 +2598,61 @@ return -EOPNOTSUPP; } - if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ETH_ADDRS)) { - struct flow_dissector_key_eth_addrs *key, *mask; + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) { + struct flow_match_eth_addrs match; - key = skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_ETH_ADDRS, - f->key); - mask = skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_ETH_ADDRS, - f->mask); - - if (!is_zero_ether_addr(mask->dst)) { - if (!is_broadcast_ether_addr(mask->dst)) { + flow_rule_match_eth_addrs(rule, &match); + if (!is_zero_ether_addr(match.mask->dst)) { + if (!is_broadcast_ether_addr(match.mask->dst)) { NL_SET_ERR_MSG_MOD(extack, "Only full masks are supported for destination MAC address"); return -EINVAL; } input->filter.match_flags |= IGB_FILTER_FLAG_DST_MAC_ADDR; - ether_addr_copy(input->filter.dst_addr, key->dst); + ether_addr_copy(input->filter.dst_addr, match.key->dst); } - if (!is_zero_ether_addr(mask->src)) { - if (!is_broadcast_ether_addr(mask->src)) { + if (!is_zero_ether_addr(match.mask->src)) { + if (!is_broadcast_ether_addr(match.mask->src)) { NL_SET_ERR_MSG_MOD(extack, "Only full masks are supported for source MAC address"); return -EINVAL; } input->filter.match_flags |= IGB_FILTER_FLAG_SRC_MAC_ADDR; - ether_addr_copy(input->filter.src_addr, key->src); + ether_addr_copy(input->filter.src_addr, match.key->src); } } - if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_BASIC)) { - struct flow_dissector_key_basic *key, *mask; + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) { + struct flow_match_basic match; - key = skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_BASIC, - f->key); - mask = skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_BASIC, - f->mask); - - if (mask->n_proto) { - if (mask->n_proto != ETHER_TYPE_FULL_MASK) { + flow_rule_match_basic(rule, &match); + if (match.mask->n_proto) { + if (match.mask->n_proto != ETHER_TYPE_FULL_MASK) { NL_SET_ERR_MSG_MOD(extack, "Only full mask is supported for EtherType filter"); return -EINVAL; } input->filter.match_flags |= IGB_FILTER_FLAG_ETHER_TYPE; - input->filter.etype = key->n_proto; + input->filter.etype = match.key->n_proto; } } - if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_VLAN)) { - struct flow_dissector_key_vlan *key, *mask; + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) { + struct flow_match_vlan match; - key = skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_VLAN, - f->key); - mask = skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_VLAN, - f->mask); - - if (mask->vlan_priority) { - if (mask->vlan_priority != VLAN_PRIO_FULL_MASK) { + flow_rule_match_vlan(rule, &match); + if (match.mask->vlan_priority) { + if (match.mask->vlan_priority != VLAN_PRIO_FULL_MASK) { NL_SET_ERR_MSG_MOD(extack, "Only full mask is supported for VLAN priority"); return -EINVAL; } input->filter.match_flags |= IGB_FILTER_FLAG_VLAN_TCI; - input->filter.vlan_tci = key->vlan_priority; + input->filter.vlan_tci = + (__force __be16)match.key->vlan_priority; } } @@ -2684,7 +2663,7 @@ } static int igb_configure_clsflower(struct igb_adapter *adapter, - struct tc_cls_flower_offload *cls_flower) + struct flow_cls_offload *cls_flower) { struct netlink_ext_ack *extack = cls_flower->common.extack; struct igb_nfc_filter *filter, *f; @@ -2746,7 +2725,7 @@ } static int igb_delete_clsflower(struct igb_adapter *adapter, - struct tc_cls_flower_offload *cls_flower) + struct flow_cls_offload *cls_flower) { struct igb_nfc_filter *filter; int err; @@ -2776,14 +2755,14 @@ } static int igb_setup_tc_cls_flower(struct igb_adapter *adapter, - struct tc_cls_flower_offload *cls_flower) + struct flow_cls_offload *cls_flower) { switch (cls_flower->command) { - case TC_CLSFLOWER_REPLACE: + case FLOW_CLS_REPLACE: return igb_configure_clsflower(adapter, cls_flower); - case TC_CLSFLOWER_DESTROY: + case FLOW_CLS_DESTROY: return igb_delete_clsflower(adapter, cls_flower); - case TC_CLSFLOWER_STATS: + case FLOW_CLS_STATS: return -EOPNOTSUPP; default: return -EOPNOTSUPP; @@ -2802,25 +2781,6 @@ case TC_SETUP_CLSFLOWER: return igb_setup_tc_cls_flower(adapter, type_data); - default: - return -EOPNOTSUPP; - } -} - -static int igb_setup_tc_block(struct igb_adapter *adapter, - struct tc_block_offload *f) -{ - if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS) - return -EOPNOTSUPP; - - switch (f->command) { - case TC_BLOCK_BIND: - return tcf_block_cb_register(f->block, igb_setup_tc_block_cb, - adapter, adapter, f->extack); - case TC_BLOCK_UNBIND: - tcf_block_cb_unregister(f->block, igb_setup_tc_block_cb, - adapter); - return 0; default: return -EOPNOTSUPP; } @@ -2849,6 +2809,8 @@ return 0; } +static LIST_HEAD(igb_block_cb_list); + static int igb_setup_tc(struct net_device *dev, enum tc_setup_type type, void *type_data) { @@ -2858,13 +2820,168 @@ case TC_SETUP_QDISC_CBS: return igb_offload_cbs(adapter, type_data); case TC_SETUP_BLOCK: - return igb_setup_tc_block(adapter, type_data); + return flow_block_cb_setup_simple(type_data, + &igb_block_cb_list, + igb_setup_tc_block_cb, + adapter, adapter, true); + case TC_SETUP_QDISC_ETF: return igb_offload_txtime(adapter, type_data); default: return -EOPNOTSUPP; } +} + +static int igb_xdp_setup(struct net_device *dev, struct netdev_bpf *bpf) +{ + int i, frame_size = dev->mtu + IGB_ETH_PKT_HDR_PAD; + struct igb_adapter *adapter = netdev_priv(dev); + struct bpf_prog *prog = bpf->prog, *old_prog; + bool running = netif_running(dev); + bool need_reset; + + /* verify igb ring attributes are sufficient for XDP */ + for (i = 0; i < adapter->num_rx_queues; i++) { + struct igb_ring *ring = adapter->rx_ring[i]; + + if (frame_size > igb_rx_bufsz(ring)) { + NL_SET_ERR_MSG_MOD(bpf->extack, + "The RX buffer size is too small for the frame size"); + netdev_warn(dev, "XDP RX buffer size %d is too small for the frame size %d\n", + igb_rx_bufsz(ring), frame_size); + return -EINVAL; + } + } + + old_prog = xchg(&adapter->xdp_prog, prog); + need_reset = (!!prog != !!old_prog); + + /* device is up and bpf is added/removed, must setup the RX queues */ + if (need_reset && running) { + igb_close(dev); + } else { + for (i = 0; i < adapter->num_rx_queues; i++) + (void)xchg(&adapter->rx_ring[i]->xdp_prog, + adapter->xdp_prog); + } + + if (old_prog) + bpf_prog_put(old_prog); + + /* bpf is just replaced, RXQ and MTU are already setup */ + if (!need_reset) + return 0; + + if (running) + igb_open(dev); + + return 0; +} + +static int igb_xdp(struct net_device *dev, struct netdev_bpf *xdp) +{ + switch (xdp->command) { + case XDP_SETUP_PROG: + return igb_xdp_setup(dev, xdp); + default: + return -EINVAL; + } +} + +static void igb_xdp_ring_update_tail(struct igb_ring *ring) +{ + /* Force memory writes to complete before letting h/w know there + * are new descriptors to fetch. + */ + wmb(); + writel(ring->next_to_use, ring->tail); +} + +static struct igb_ring *igb_xdp_tx_queue_mapping(struct igb_adapter *adapter) +{ + unsigned int r_idx = smp_processor_id(); + + if (r_idx >= adapter->num_tx_queues) + r_idx = r_idx % adapter->num_tx_queues; + + return adapter->tx_ring[r_idx]; +} + +static int igb_xdp_xmit_back(struct igb_adapter *adapter, struct xdp_buff *xdp) +{ + struct xdp_frame *xdpf = xdp_convert_buff_to_frame(xdp); + int cpu = smp_processor_id(); + struct igb_ring *tx_ring; + struct netdev_queue *nq; + u32 ret; + + if (unlikely(!xdpf)) + return IGB_XDP_CONSUMED; + + /* During program transitions its possible adapter->xdp_prog is assigned + * but ring has not been configured yet. In this case simply abort xmit. + */ + tx_ring = adapter->xdp_prog ? igb_xdp_tx_queue_mapping(adapter) : NULL; + if (unlikely(!tx_ring)) + return IGB_XDP_CONSUMED; + + nq = txring_txq(tx_ring); + __netif_tx_lock(nq, cpu); + /* Avoid transmit queue timeout since we share it with the slow path */ + nq->trans_start = jiffies; + ret = igb_xmit_xdp_ring(adapter, tx_ring, xdpf); + __netif_tx_unlock(nq); + + return ret; +} + +static int igb_xdp_xmit(struct net_device *dev, int n, + struct xdp_frame **frames, u32 flags) +{ + struct igb_adapter *adapter = netdev_priv(dev); + int cpu = smp_processor_id(); + struct igb_ring *tx_ring; + struct netdev_queue *nq; + int drops = 0; + int i; + + if (unlikely(test_bit(__IGB_DOWN, &adapter->state))) + return -ENETDOWN; + + if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) + return -EINVAL; + + /* During program transitions its possible adapter->xdp_prog is assigned + * but ring has not been configured yet. In this case simply abort xmit. + */ + tx_ring = adapter->xdp_prog ? igb_xdp_tx_queue_mapping(adapter) : NULL; + if (unlikely(!tx_ring)) + return -ENXIO; + + nq = txring_txq(tx_ring); + __netif_tx_lock(nq, cpu); + + /* Avoid transmit queue timeout since we share it with the slow path */ + nq->trans_start = jiffies; + + for (i = 0; i < n; i++) { + struct xdp_frame *xdpf = frames[i]; + int err; + + err = igb_xmit_xdp_ring(adapter, tx_ring, xdpf); + if (err != IGB_XDP_TX) { + xdp_return_frame_rx_napi(xdpf); + drops++; + } + } + + __netif_tx_unlock(nq); + + if (unlikely(flags & XDP_XMIT_FLUSH)) + igb_xdp_ring_update_tail(tx_ring); + + return n - drops; } static const struct net_device_ops igb_netdev_ops = { @@ -2891,6 +3008,8 @@ .ndo_fdb_add = igb_ndo_fdb_add, .ndo_features_check = igb_features_check, .ndo_setup_tc = igb_setup_tc, + .ndo_bpf = igb_xdp, + .ndo_xdp_xmit = igb_xdp_xmit, }; /** @@ -2915,7 +3034,7 @@ fw.invm_img_type); break; } - /* fall through */ + fallthrough; default: /* if option is rom valid, display its version too */ if (fw.or_valid) { @@ -3157,7 +3276,7 @@ NETIF_F_HW_CSUM; if (hw->mac.type >= e1000_82576) - netdev->features |= NETIF_F_SCTP_CRC; + netdev->features |= NETIF_F_SCTP_CRC | NETIF_F_GSO_UDP_L4; if (hw->mac.type >= e1000_i350) netdev->features |= NETIF_F_HW_TC; @@ -3431,7 +3550,9 @@ "Width x1" : "unknown"), netdev->dev_addr); } - if ((hw->mac.type >= e1000_i210 || + if ((hw->mac.type == e1000_82576 && + rd32(E1000_EECD) & E1000_EECD_PRES) || + (hw->mac.type >= e1000_i210 || igb_get_flash_presence_i210(hw))) { ret_val = igb_read_part_string(hw, part_str, E1000_PBANUM_LENGTH); @@ -3478,7 +3599,7 @@ } } - dev_pm_set_driver_flags(&pdev->dev, DPM_FLAG_NEVER_SKIP); + dev_pm_set_driver_flags(&pdev->dev, DPM_FLAG_NO_DIRECT_COMPLETE); pm_runtime_put_noidle(&pdev->dev); return 0; @@ -3517,6 +3638,7 @@ struct net_device *netdev = pci_get_drvdata(pdev); struct igb_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; + unsigned long flags; /* reclaim resources allocated to VFs */ if (adapter->vf_data) { @@ -3529,12 +3651,13 @@ pci_disable_sriov(pdev); msleep(500); } - + spin_lock_irqsave(&adapter->vfs_lock, flags); kfree(adapter->vf_mac_list); adapter->vf_mac_list = NULL; kfree(adapter->vf_data); adapter->vf_data = NULL; adapter->vfs_allocated_count = 0; + spin_unlock_irqrestore(&adapter->vfs_lock, flags); wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ); wrfl(); msleep(100); @@ -3694,7 +3817,9 @@ igb_release_hw_control(adapter); #ifdef CONFIG_PCI_IOV + rtnl_lock(); igb_disable_sriov(pdev); + rtnl_unlock(); #endif unregister_netdev(netdev); @@ -3767,13 +3892,13 @@ max_rss_queues = 1; break; } - /* fall through */ + fallthrough; case e1000_82576: if (!!adapter->vfs_allocated_count) { max_rss_queues = 2; break; } - /* fall through */ + fallthrough; case e1000_82580: case e1000_i354: default: @@ -3849,12 +3974,14 @@ /* set default work limits */ adapter->tx_work_limit = IGB_DEFAULT_TX_WORK; - adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN + - VLAN_HLEN; + adapter->max_frame_size = netdev->mtu + IGB_ETH_PKT_HDR_PAD; adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN; spin_lock_init(&adapter->nfc_lock); spin_lock_init(&adapter->stats64_lock); + + /* init spinlock to avoid concurrency of VF resources */ + spin_lock_init(&adapter->vfs_lock); #ifdef CONFIG_PCI_IOV switch (hw->mac.type) { case e1000_82576: @@ -3912,6 +4039,7 @@ /** * igb_open - Called when a network interface is made active * @netdev: network interface device structure + * @resuming: indicates whether we are in a resume call * * Returns 0 on success, negative value on failure * @@ -4029,6 +4157,7 @@ /** * igb_close - Disables a network interface * @netdev: network interface device structure + * @suspending: indicates we are in a suspend call * * Returns 0, this is not allowed to fail * @@ -4222,6 +4351,7 @@ **/ int igb_setup_rx_resources(struct igb_ring *rx_ring) { + struct igb_adapter *adapter = netdev_priv(rx_ring->netdev); struct device *dev = rx_ring->dev; int size; @@ -4243,6 +4373,13 @@ rx_ring->next_to_alloc = 0; rx_ring->next_to_clean = 0; rx_ring->next_to_use = 0; + + rx_ring->xdp_prog = adapter->xdp_prog; + + /* XDP RX-queue info */ + if (xdp_rxq_info_reg(&rx_ring->xdp_rxq, rx_ring->netdev, + rx_ring->queue_index) < 0) + goto err; return 0; @@ -4362,8 +4499,7 @@ else mrqc |= E1000_MRQC_ENABLE_VMDQ; } else { - if (hw->mac.type != e1000_i211) - mrqc |= E1000_MRQC_ENABLE_RSS_MQ; + mrqc |= E1000_MRQC_ENABLE_RSS_MQ; } igb_vmm_control(adapter); @@ -4502,6 +4638,37 @@ } /** + * igb_setup_srrctl - configure the split and replication receive control + * registers + * @adapter: Board private structure + * @ring: receive ring to be configured + **/ +void igb_setup_srrctl(struct igb_adapter *adapter, struct igb_ring *ring) +{ + struct e1000_hw *hw = &adapter->hw; + int reg_idx = ring->reg_idx; + u32 srrctl = 0; + + srrctl = IGB_RX_HDR_LEN << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT; + if (ring_uses_large_buffer(ring)) + srrctl |= IGB_RXBUFFER_3072 >> E1000_SRRCTL_BSIZEPKT_SHIFT; + else + srrctl |= IGB_RXBUFFER_2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT; + srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF; + if (hw->mac.type >= e1000_82580) + srrctl |= E1000_SRRCTL_TIMESTAMP; + /* Only set Drop Enable if VFs allocated, or we are supporting multiple + * queues and rx flow control is disabled + */ + if (adapter->vfs_allocated_count || + (!(hw->fc.current_mode & e1000_fc_rx_pause) && + adapter->num_rx_queues > 1)) + srrctl |= E1000_SRRCTL_DROP_EN; + + wr32(E1000_SRRCTL(reg_idx), srrctl); +} + +/** * igb_configure_rx_ring - Configure a receive ring after Reset * @adapter: board private structure * @ring: receive ring to be configured @@ -4515,7 +4682,11 @@ union e1000_adv_rx_desc *rx_desc; u64 rdba = ring->dma; int reg_idx = ring->reg_idx; - u32 srrctl = 0, rxdctl = 0; + u32 rxdctl = 0; + + xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq); + WARN_ON(xdp_rxq_info_reg_mem_model(&ring->xdp_rxq, + MEM_TYPE_PAGE_SHARED, NULL)); /* disable the queue */ wr32(E1000_RXDCTL(reg_idx), 0); @@ -4533,19 +4704,7 @@ writel(0, ring->tail); /* set descriptor configuration */ - srrctl = IGB_RX_HDR_LEN << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT; - if (ring_uses_large_buffer(ring)) - srrctl |= IGB_RXBUFFER_3072 >> E1000_SRRCTL_BSIZEPKT_SHIFT; - else - srrctl |= IGB_RXBUFFER_2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT; - srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF; - if (hw->mac.type >= e1000_82580) - srrctl |= E1000_SRRCTL_TIMESTAMP; - /* Only set Drop Enable if we are supporting multiple queues */ - if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1) - srrctl |= E1000_SRRCTL_DROP_EN; - - wr32(E1000_SRRCTL(reg_idx), srrctl); + igb_setup_srrctl(adapter, ring); /* set filtering for VMDQ pools */ igb_set_vmolr(adapter, reg_idx & 0x7, true); @@ -4661,8 +4820,11 @@ while (i != tx_ring->next_to_use) { union e1000_adv_tx_desc *eop_desc, *tx_desc; - /* Free all the Tx ring sk_buffs */ - dev_kfree_skb_any(tx_buffer->skb); + /* Free all the Tx ring sk_buffs or xdp frames */ + if (tx_buffer->type == IGB_TYPE_SKB) + dev_kfree_skb_any(tx_buffer->skb); + else + xdp_return_frame(tx_buffer->xdpf); /* unmap skb header data */ dma_unmap_single(tx_ring->dev, @@ -4735,6 +4897,8 @@ { igb_clean_rx_ring(rx_ring); + rx_ring->xdp_prog = NULL; + xdp_rxq_info_unreg(&rx_ring->xdp_rxq); vfree(rx_ring->rx_buffer_info); rx_ring->rx_buffer_info = NULL; @@ -4771,8 +4935,7 @@ { u16 i = rx_ring->next_to_clean; - if (rx_ring->skb) - dev_kfree_skb(rx_ring->skb); + dev_kfree_skb(rx_ring->skb); rx_ring->skb = NULL; /* Free all the Rx ring sk_buffs */ @@ -4896,14 +5059,14 @@ /* VLAN filtering needed for VLAN prio filter */ if (adapter->netdev->features & NETIF_F_NTUPLE) break; - /* fall through */ + fallthrough; case e1000_82576: case e1000_82580: case e1000_i354: /* VLAN filtering needed for pool filtering */ if (adapter->vfs_allocated_count) break; - /* fall through */ + fallthrough; default: return 1; } @@ -5183,7 +5346,7 @@ case e1000_media_type_copper: if (!hw->mac.get_link_status) return true; - /* fall through */ + fallthrough; case e1000_media_type_internal_serdes: hw->mac.ops.check_for_link(hw); link_active = !hw->mac.get_link_status; @@ -5247,7 +5410,7 @@ /** * igb_watchdog - Timer Call-back - * @data: pointer to adapter cast into an unsigned long + * @t: pointer to timer_list containing our private info pointer **/ static void igb_watchdog(struct timer_list *t) { @@ -5346,7 +5509,8 @@ break; } - if (adapter->link_speed != SPEED_1000) + if (adapter->link_speed != SPEED_1000 || + !hw->phy.ops.read_reg) goto no_wait; /* wait for Remote receiver status OK */ @@ -5714,8 +5878,8 @@ * should have been handled by the upper layers. */ if (tx_ring->launchtime_enable) { - ts = ns_to_timespec64(first->skb->tstamp); - first->skb->tstamp = 0; + ts = ktime_to_timespec64(first->skb->tstamp); + first->skb->tstamp = ktime_set(0, 0); context_desc->seqnum_seed = cpu_to_le32(ts.tv_nsec / 32); } else { context_desc->seqnum_seed = 0; @@ -5735,6 +5899,7 @@ } ip; union { struct tcphdr *tcp; + struct udphdr *udp; unsigned char *hdr; } l4; u32 paylen, l4_offset; @@ -5754,7 +5919,8 @@ l4.hdr = skb_checksum_start(skb); /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */ - type_tucmd = E1000_ADVTXD_TUCMD_L4T_TCP; + type_tucmd = (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) ? + E1000_ADVTXD_TUCMD_L4T_UDP : E1000_ADVTXD_TUCMD_L4T_TCP; /* initialize outer IP header fields */ if (ip.v4->version == 4) { @@ -5782,12 +5948,19 @@ /* determine offset of inner transport header */ l4_offset = l4.hdr - skb->data; - /* compute length of segmentation header */ - *hdr_len = (l4.tcp->doff * 4) + l4_offset; - /* remove payload length from inner checksum */ paylen = skb->len - l4_offset; - csum_replace_by_diff(&l4.tcp->check, htonl(paylen)); + if (type_tucmd & E1000_ADVTXD_TUCMD_L4T_TCP) { + /* compute length of segmentation header */ + *hdr_len = (l4.tcp->doff * 4) + l4_offset; + csum_replace_by_diff(&l4.tcp->check, + (__force __wsum)htonl(paylen)); + } else { + /* compute length of segmentation header */ + *hdr_len = sizeof(*l4.udp) + l4_offset; + csum_replace_by_diff(&l4.udp->check, + (__force __wsum)htonl(paylen)); + } /* update gso size and bytecount with header size */ first->gso_segs = skb_shinfo(skb)->gso_segs; @@ -5834,7 +6007,7 @@ switch (skb->csum_offset) { case offsetof(struct tcphdr, check): type_tucmd = E1000_ADVTXD_TUCMD_L4T_TCP; - /* fall through */ + fallthrough; case offsetof(struct udphdr, check): break; case offsetof(struct sctphdr, checksum): @@ -5846,7 +6019,7 @@ type_tucmd = E1000_ADVTXD_TUCMD_L4T_SCTP; break; } - /* fall through */ + fallthrough; default: skb_checksum_help(skb); goto csum_failed; @@ -5958,7 +6131,7 @@ struct sk_buff *skb = first->skb; struct igb_tx_buffer *tx_buffer; union e1000_adv_tx_desc *tx_desc; - struct skb_frag_struct *frag; + skb_frag_t *frag; dma_addr_t dma; unsigned int data_len, size; u32 tx_flags = first->tx_flags; @@ -6035,6 +6208,8 @@ /* set the timestamp */ first->time_stamp = jiffies; + skb_tx_timestamp(skb); + /* Force memory writes to complete before letting h/w know there * are new descriptors to fetch. (Only applicable for weak-ordered * memory model archs, such as IA-64). @@ -6056,13 +6231,8 @@ /* Make sure there is space in the ring for the next send. */ igb_maybe_stop_tx(tx_ring, DESC_NEEDED); - if (netif_xmit_stopped(txring_txq(tx_ring)) || !skb->xmit_more) { + if (netif_xmit_stopped(txring_txq(tx_ring)) || !netdev_xmit_more()) { writel(i, tx_ring->tail); - - /* we need this if more than one processor can write to our tail - * at a time, it synchronizes IO on IA64/Altix systems - */ - mmiowb(); } return 0; @@ -6099,6 +6269,80 @@ return -1; } +int igb_xmit_xdp_ring(struct igb_adapter *adapter, + struct igb_ring *tx_ring, + struct xdp_frame *xdpf) +{ + union e1000_adv_tx_desc *tx_desc; + u32 len, cmd_type, olinfo_status; + struct igb_tx_buffer *tx_buffer; + dma_addr_t dma; + u16 i; + + len = xdpf->len; + + if (unlikely(!igb_desc_unused(tx_ring))) + return IGB_XDP_CONSUMED; + + dma = dma_map_single(tx_ring->dev, xdpf->data, len, DMA_TO_DEVICE); + if (dma_mapping_error(tx_ring->dev, dma)) + return IGB_XDP_CONSUMED; + + /* record the location of the first descriptor for this packet */ + tx_buffer = &tx_ring->tx_buffer_info[tx_ring->next_to_use]; + tx_buffer->bytecount = len; + tx_buffer->gso_segs = 1; + tx_buffer->protocol = 0; + + i = tx_ring->next_to_use; + tx_desc = IGB_TX_DESC(tx_ring, i); + + dma_unmap_len_set(tx_buffer, len, len); + dma_unmap_addr_set(tx_buffer, dma, dma); + tx_buffer->type = IGB_TYPE_XDP; + tx_buffer->xdpf = xdpf; + + tx_desc->read.buffer_addr = cpu_to_le64(dma); + + /* put descriptor type bits */ + cmd_type = E1000_ADVTXD_DTYP_DATA | + E1000_ADVTXD_DCMD_DEXT | + E1000_ADVTXD_DCMD_IFCS; + cmd_type |= len | IGB_TXD_DCMD; + tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type); + + olinfo_status = len << E1000_ADVTXD_PAYLEN_SHIFT; + /* 82575 requires a unique index per ring */ + if (test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags)) + olinfo_status |= tx_ring->reg_idx << 4; + + tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status); + + netdev_tx_sent_queue(txring_txq(tx_ring), tx_buffer->bytecount); + + /* set the timestamp */ + tx_buffer->time_stamp = jiffies; + + /* Avoid any potential race with xdp_xmit and cleanup */ + smp_wmb(); + + /* set next_to_watch value indicating a packet is present */ + i++; + if (i == tx_ring->count) + i = 0; + + tx_buffer->next_to_watch = tx_desc; + tx_ring->next_to_use = i; + + /* Make sure there is space in the ring for the next send. */ + igb_maybe_stop_tx(tx_ring, DESC_NEEDED); + + if (netif_xmit_stopped(txring_txq(tx_ring)) || !netdev_xmit_more()) + writel(i, tx_ring->tail); + + return IGB_XDP_TX; +} + netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb, struct igb_ring *tx_ring) { @@ -6117,7 +6361,8 @@ * otherwise try next time */ for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) - count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size); + count += TXD_USE_COUNT(skb_frag_size( + &skb_shinfo(skb)->frags[f])); if (igb_maybe_stop_tx(tx_ring, count + 3)) { /* this is a hard error */ @@ -6126,6 +6371,7 @@ /* record the location of the first descriptor for this packet */ first = &tx_ring->tx_buffer_info[tx_ring->next_to_use]; + first->type = IGB_TYPE_SKB; first->skb = skb; first->bytecount = skb->len; first->gso_segs = 1; @@ -6162,8 +6408,6 @@ goto out_drop; else if (!tso) igb_tx_csum(tx_ring, first); - - skb_tx_timestamp(skb); if (igb_tx_map(tx_ring, first, hdr_len)) goto cleanup_tx_tstamp; @@ -6215,8 +6459,9 @@ /** * igb_tx_timeout - Respond to a Tx Hang * @netdev: network interface device structure + * @txqueue: number of the Tx queue that hung (unused) **/ -static void igb_tx_timeout(struct net_device *netdev) +static void igb_tx_timeout(struct net_device *netdev, unsigned int __always_unused txqueue) { struct igb_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; @@ -6277,8 +6522,22 @@ static int igb_change_mtu(struct net_device *netdev, int new_mtu) { struct igb_adapter *adapter = netdev_priv(netdev); - struct pci_dev *pdev = adapter->pdev; - int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN; + int max_frame = new_mtu + IGB_ETH_PKT_HDR_PAD; + + if (adapter->xdp_prog) { + int i; + + for (i = 0; i < adapter->num_rx_queues; i++) { + struct igb_ring *ring = adapter->rx_ring[i]; + + if (max_frame > igb_rx_bufsz(ring)) { + netdev_warn(adapter->netdev, + "Requested MTU size is not supported with XDP. Max frame size is %d\n", + max_frame); + return -EINVAL; + } + } + } /* adjust max frame to be at least the size of a standard frame */ if (max_frame < (ETH_FRAME_LEN + ETH_FCS_LEN)) @@ -6293,8 +6552,8 @@ if (netif_running(netdev)) igb_down(adapter); - dev_info(&pdev->dev, "changing MTU from %d to %d\n", - netdev->mtu, new_mtu); + netdev_dbg(netdev, "changing MTU from %d to %d\n", + netdev->mtu, new_mtu); netdev->mtu = new_mtu; if (netif_running(netdev)) @@ -6738,7 +6997,7 @@ igb_setup_dca(adapter); break; } - /* Fall Through since DCA is disabled. */ + fallthrough; /* since DCA is disabled. */ case DCA_PROVIDER_REMOVE: if (adapter->flags & IGB_FLAG_DCA_ENABLED) { /* without this a class_device is left @@ -7157,7 +7416,7 @@ { struct e1000_hw *hw = &adapter->hw; unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses; - u32 reg, msgbuf[3]; + u32 reg, msgbuf[3] = {}; u8 *addr = (u8 *)(&msgbuf[1]); /* process all the same items cleared in a function level reset */ @@ -7191,7 +7450,7 @@ for (i = 0; i < hw->mac.rar_entry_count; i++) { adapter->mac_table[i].state &= ~IGB_MAC_STATE_IN_USE; - memset(adapter->mac_table[i].addr, 0, ETH_ALEN); + eth_zero_addr(adapter->mac_table[i].addr); adapter->mac_table[i].queue = 0; igb_rar_set_index(adapter, i); } @@ -7340,7 +7599,7 @@ } else { adapter->mac_table[i].state = 0; adapter->mac_table[i].queue = 0; - memset(adapter->mac_table[i].addr, 0, ETH_ALEN); + eth_zero_addr(adapter->mac_table[i].addr); } igb_rar_set_index(adapter, i); @@ -7600,8 +7859,10 @@ static void igb_msg_task(struct igb_adapter *adapter) { struct e1000_hw *hw = &adapter->hw; + unsigned long flags; u32 vf; + spin_lock_irqsave(&adapter->vfs_lock, flags); for (vf = 0; vf < adapter->vfs_allocated_count; vf++) { /* process any reset requests */ if (!igb_check_for_rst(hw, vf)) @@ -7615,6 +7876,7 @@ if (!igb_check_for_ack(hw, vf)) igb_rcv_ack_from_vf(adapter, vf); } + spin_unlock_irqrestore(&adapter->vfs_lock, flags); } /** @@ -7778,11 +8040,13 @@ if (!clean_complete) return budget; - /* If not enough Rx work done, exit the polling mode */ - napi_complete_done(napi, work_done); - igb_ring_irq_enable(q_vector); + /* Exit the polling mode, but don't re-enable interrupts if stack might + * poll us due to busy-polling + */ + if (likely(napi_complete_done(napi, work_done))) + igb_ring_irq_enable(q_vector); - return 0; + return work_done; } /** @@ -7831,7 +8095,10 @@ total_packets += tx_buffer->gso_segs; /* free the skb */ - napi_consume_skb(tx_buffer->skb, napi_budget); + if (tx_buffer->type == IGB_TYPE_SKB) + napi_consume_skb(tx_buffer->skb, napi_budget); + else + xdp_return_frame(tx_buffer->xdpf); /* unmap skb header data */ dma_unmap_single(tx_ring->dev, @@ -7990,7 +8257,8 @@ return (page_to_nid(page) != numa_mem_id()) || page_is_pfmemalloc(page); } -static bool igb_can_reuse_rx_page(struct igb_rx_buffer *rx_buffer) +static bool igb_can_reuse_rx_page(struct igb_rx_buffer *rx_buffer, + int rx_buf_pgcnt) { unsigned int pagecnt_bias = rx_buffer->pagecnt_bias; struct page *page = rx_buffer->page; @@ -8001,7 +8269,7 @@ #if (PAGE_SIZE < 8192) /* if we are only owner of page we can reuse it */ - if (unlikely((page_ref_count(page) - pagecnt_bias) > 1)) + if (unlikely((rx_buf_pgcnt - pagecnt_bias) > 1)) return false; #else #define IGB_LAST_OFFSET \ @@ -8015,8 +8283,8 @@ * the pagecnt_bias and page count so that we fully restock the * number of references the driver holds. */ - if (unlikely(!pagecnt_bias)) { - page_ref_add(page, USHRT_MAX); + if (unlikely(pagecnt_bias == 1)) { + page_ref_add(page, USHRT_MAX - 1); rx_buffer->pagecnt_bias = USHRT_MAX; } @@ -8055,23 +8323,21 @@ static struct sk_buff *igb_construct_skb(struct igb_ring *rx_ring, struct igb_rx_buffer *rx_buffer, - union e1000_adv_rx_desc *rx_desc, - unsigned int size) + struct xdp_buff *xdp, + union e1000_adv_rx_desc *rx_desc) { - void *va = page_address(rx_buffer->page) + rx_buffer->page_offset; #if (PAGE_SIZE < 8192) unsigned int truesize = igb_rx_pg_size(rx_ring) / 2; #else - unsigned int truesize = SKB_DATA_ALIGN(size); + unsigned int truesize = SKB_DATA_ALIGN(xdp->data_end - + xdp->data_hard_start); #endif + unsigned int size = xdp->data_end - xdp->data; unsigned int headlen; struct sk_buff *skb; /* prefetch first cache line of first page */ - prefetch(va); -#if L1_CACHE_BYTES < 128 - prefetch(va + L1_CACHE_BYTES); -#endif + net_prefetch(xdp->data); /* allocate a skb to store the frags */ skb = napi_alloc_skb(&rx_ring->q_vector->napi, IGB_RX_HDR_LEN); @@ -8079,24 +8345,25 @@ return NULL; if (unlikely(igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP))) { - igb_ptp_rx_pktstamp(rx_ring->q_vector, va, skb); - va += IGB_TS_HDR_LEN; - size -= IGB_TS_HDR_LEN; + if (!igb_ptp_rx_pktstamp(rx_ring->q_vector, xdp->data, skb)) { + xdp->data += IGB_TS_HDR_LEN; + size -= IGB_TS_HDR_LEN; + } } /* Determine available headroom for copy */ headlen = size; if (headlen > IGB_RX_HDR_LEN) - headlen = eth_get_headlen(va, IGB_RX_HDR_LEN); + headlen = eth_get_headlen(skb->dev, xdp->data, IGB_RX_HDR_LEN); /* align pull length to size of long to optimize memcpy performance */ - memcpy(__skb_put(skb, headlen), va, ALIGN(headlen, sizeof(long))); + memcpy(__skb_put(skb, headlen), xdp->data, ALIGN(headlen, sizeof(long))); /* update all of the pointers */ size -= headlen; if (size) { skb_add_rx_frag(skb, 0, rx_buffer->page, - (va + headlen) - page_address(rx_buffer->page), + (xdp->data + headlen) - page_address(rx_buffer->page), size, truesize); #if (PAGE_SIZE < 8192) rx_buffer->page_offset ^= truesize; @@ -8112,37 +8379,38 @@ static struct sk_buff *igb_build_skb(struct igb_ring *rx_ring, struct igb_rx_buffer *rx_buffer, - union e1000_adv_rx_desc *rx_desc, - unsigned int size) + struct xdp_buff *xdp, + union e1000_adv_rx_desc *rx_desc) { - void *va = page_address(rx_buffer->page) + rx_buffer->page_offset; #if (PAGE_SIZE < 8192) unsigned int truesize = igb_rx_pg_size(rx_ring) / 2; #else unsigned int truesize = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) + - SKB_DATA_ALIGN(IGB_SKB_PAD + size); + SKB_DATA_ALIGN(xdp->data_end - + xdp->data_hard_start); #endif + unsigned int metasize = xdp->data - xdp->data_meta; struct sk_buff *skb; /* prefetch first cache line of first page */ - prefetch(va); -#if L1_CACHE_BYTES < 128 - prefetch(va + L1_CACHE_BYTES); -#endif + net_prefetch(xdp->data_meta); /* build an skb around the page buffer */ - skb = build_skb(va - IGB_SKB_PAD, truesize); + skb = build_skb(xdp->data_hard_start, truesize); if (unlikely(!skb)) return NULL; /* update pointers within the skb to store the data */ - skb_reserve(skb, IGB_SKB_PAD); - __skb_put(skb, size); + skb_reserve(skb, xdp->data - xdp->data_hard_start); + __skb_put(skb, xdp->data_end - xdp->data); + + if (metasize) + skb_metadata_set(skb, metasize); /* pull timestamp out of packet data */ if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) { - igb_ptp_rx_pktstamp(rx_ring->q_vector, skb->data, skb); - __skb_pull(skb, IGB_TS_HDR_LEN); + if (!igb_ptp_rx_pktstamp(rx_ring->q_vector, skb->data, skb)) + __skb_pull(skb, IGB_TS_HDR_LEN); } /* update buffer offset */ @@ -8153,6 +8421,81 @@ #endif return skb; +} + +static struct sk_buff *igb_run_xdp(struct igb_adapter *adapter, + struct igb_ring *rx_ring, + struct xdp_buff *xdp) +{ + int err, result = IGB_XDP_PASS; + struct bpf_prog *xdp_prog; + u32 act; + + rcu_read_lock(); + xdp_prog = READ_ONCE(rx_ring->xdp_prog); + + if (!xdp_prog) + goto xdp_out; + + prefetchw(xdp->data_hard_start); /* xdp_frame write */ + + act = bpf_prog_run_xdp(xdp_prog, xdp); + switch (act) { + case XDP_PASS: + break; + case XDP_TX: + result = igb_xdp_xmit_back(adapter, xdp); + if (result == IGB_XDP_CONSUMED) + goto out_failure; + break; + case XDP_REDIRECT: + err = xdp_do_redirect(adapter->netdev, xdp, xdp_prog); + if (err) + goto out_failure; + result = IGB_XDP_REDIR; + break; + default: + bpf_warn_invalid_xdp_action(act); + fallthrough; + case XDP_ABORTED: +out_failure: + trace_xdp_exception(rx_ring->netdev, xdp_prog, act); + fallthrough; + case XDP_DROP: + result = IGB_XDP_CONSUMED; + break; + } +xdp_out: + rcu_read_unlock(); + return ERR_PTR(-result); +} + +static unsigned int igb_rx_frame_truesize(struct igb_ring *rx_ring, + unsigned int size) +{ + unsigned int truesize; + +#if (PAGE_SIZE < 8192) + truesize = igb_rx_pg_size(rx_ring) / 2; /* Must be power-of-2 */ +#else + truesize = ring_uses_build_skb(rx_ring) ? + SKB_DATA_ALIGN(IGB_SKB_PAD + size) + + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) : + SKB_DATA_ALIGN(size); +#endif + return truesize; +} + +static void igb_rx_buffer_flip(struct igb_ring *rx_ring, + struct igb_rx_buffer *rx_buffer, + unsigned int size) +{ + unsigned int truesize = igb_rx_frame_truesize(rx_ring, size); +#if (PAGE_SIZE < 8192) + rx_buffer->page_offset ^= truesize; +#else + rx_buffer->page_offset += truesize; +#endif } static inline void igb_rx_checksum(struct igb_ring *ring, @@ -8209,7 +8552,6 @@ * igb_is_non_eop - process handling of non-EOP buffers * @rx_ring: Rx ring being processed * @rx_desc: Rx descriptor for current buffer - * @skb: current socket buffer containing buffer in progress * * This function updates next to clean. If the buffer is an EOP buffer * this function exits returning false, otherwise it will place the @@ -8251,6 +8593,10 @@ union e1000_adv_rx_desc *rx_desc, struct sk_buff *skb) { + /* XDP packets use error pointer so abort at this point */ + if (IS_ERR(skb)) + return true; + if (unlikely((igb_test_staterr(rx_desc, E1000_RXDEXT_ERR_FRAME_ERR_MASK)))) { struct net_device *netdev = rx_ring->netdev; @@ -8297,7 +8643,7 @@ if (igb_test_staterr(rx_desc, E1000_RXDEXT_STATERR_LB) && test_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &rx_ring->flags)) - vid = be16_to_cpu(rx_desc->wb.upper.vlan); + vid = be16_to_cpu((__force __be16)rx_desc->wb.upper.vlan); else vid = le16_to_cpu(rx_desc->wb.upper.vlan); @@ -8309,12 +8655,23 @@ skb->protocol = eth_type_trans(skb, rx_ring->netdev); } +static unsigned int igb_rx_offset(struct igb_ring *rx_ring) +{ + return ring_uses_build_skb(rx_ring) ? IGB_SKB_PAD : 0; +} + static struct igb_rx_buffer *igb_get_rx_buffer(struct igb_ring *rx_ring, - const unsigned int size) + const unsigned int size, int *rx_buf_pgcnt) { struct igb_rx_buffer *rx_buffer; rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean]; + *rx_buf_pgcnt = +#if (PAGE_SIZE < 8192) + page_count(rx_buffer->page); +#else + 0; +#endif prefetchw(rx_buffer->page); /* we are reusing so sync this buffer for CPU use */ @@ -8330,9 +8687,9 @@ } static void igb_put_rx_buffer(struct igb_ring *rx_ring, - struct igb_rx_buffer *rx_buffer) + struct igb_rx_buffer *rx_buffer, int rx_buf_pgcnt) { - if (igb_can_reuse_rx_page(rx_buffer)) { + if (igb_can_reuse_rx_page(rx_buffer, rx_buf_pgcnt)) { /* hand second half of page back to the ring */ igb_reuse_rx_page(rx_ring, rx_buffer); } else { @@ -8352,10 +8709,21 @@ static int igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget) { + struct igb_adapter *adapter = q_vector->adapter; struct igb_ring *rx_ring = q_vector->rx.ring; struct sk_buff *skb = rx_ring->skb; unsigned int total_bytes = 0, total_packets = 0; u16 cleaned_count = igb_desc_unused(rx_ring); + unsigned int xdp_xmit = 0; + struct xdp_buff xdp; + int rx_buf_pgcnt; + + xdp.rxq = &rx_ring->xdp_rxq; + + /* Frame size depend on rx_ring setup when PAGE_SIZE=4K */ +#if (PAGE_SIZE < 8192) + xdp.frame_sz = igb_rx_frame_truesize(rx_ring, 0); +#endif while (likely(total_packets < budget)) { union e1000_adv_rx_desc *rx_desc; @@ -8379,16 +8747,41 @@ */ dma_rmb(); - rx_buffer = igb_get_rx_buffer(rx_ring, size); + rx_buffer = igb_get_rx_buffer(rx_ring, size, &rx_buf_pgcnt); /* retrieve a buffer from the ring */ - if (skb) + if (!skb) { + xdp.data = page_address(rx_buffer->page) + + rx_buffer->page_offset; + xdp.data_meta = xdp.data; + xdp.data_hard_start = xdp.data - + igb_rx_offset(rx_ring); + xdp.data_end = xdp.data + size; +#if (PAGE_SIZE > 4096) + /* At larger PAGE_SIZE, frame_sz depend on len size */ + xdp.frame_sz = igb_rx_frame_truesize(rx_ring, size); +#endif + skb = igb_run_xdp(adapter, rx_ring, &xdp); + } + + if (IS_ERR(skb)) { + unsigned int xdp_res = -PTR_ERR(skb); + + if (xdp_res & (IGB_XDP_TX | IGB_XDP_REDIR)) { + xdp_xmit |= xdp_res; + igb_rx_buffer_flip(rx_ring, rx_buffer, size); + } else { + rx_buffer->pagecnt_bias++; + } + total_packets++; + total_bytes += size; + } else if (skb) igb_add_rx_frag(rx_ring, rx_buffer, skb, size); else if (ring_uses_build_skb(rx_ring)) - skb = igb_build_skb(rx_ring, rx_buffer, rx_desc, size); + skb = igb_build_skb(rx_ring, rx_buffer, &xdp, rx_desc); else skb = igb_construct_skb(rx_ring, rx_buffer, - rx_desc, size); + &xdp, rx_desc); /* exit if we failed to retrieve a buffer */ if (!skb) { @@ -8397,7 +8790,7 @@ break; } - igb_put_rx_buffer(rx_ring, rx_buffer); + igb_put_rx_buffer(rx_ring, rx_buffer, rx_buf_pgcnt); cleaned_count++; /* fetch next buffer in frame if non-eop */ @@ -8428,6 +8821,15 @@ /* place incomplete frames back on ring for completion */ rx_ring->skb = skb; + if (xdp_xmit & IGB_XDP_REDIR) + xdp_do_flush(); + + if (xdp_xmit & IGB_XDP_TX) { + struct igb_ring *tx_ring = igb_xdp_tx_queue_mapping(adapter); + + igb_xdp_ring_update_tail(tx_ring); + } + u64_stats_update_begin(&rx_ring->rx_syncp); rx_ring->rx_stats.packets += total_packets; rx_ring->rx_stats.bytes += total_bytes; @@ -8439,11 +8841,6 @@ igb_alloc_rx_buffers(rx_ring, cleaned_count); return total_packets; -} - -static inline unsigned int igb_rx_offset(struct igb_ring *rx_ring) -{ - return ring_uses_build_skb(rx_ring) ? IGB_SKB_PAD : 0; } static bool igb_alloc_mapped_page(struct igb_ring *rx_ring, @@ -8482,14 +8879,16 @@ bi->dma = dma; bi->page = page; bi->page_offset = igb_rx_offset(rx_ring); - bi->pagecnt_bias = 1; + page_ref_add(page, USHRT_MAX - 1); + bi->pagecnt_bias = USHRT_MAX; return true; } /** - * igb_alloc_rx_buffers - Replace used receive buffers; packet split - * @adapter: address of board private structure + * igb_alloc_rx_buffers - Replace used receive buffers + * @rx_ring: rx descriptor ring to allocate new receive buffers + * @cleaned_count: count of buffers to allocate **/ void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count) { @@ -8558,9 +8957,9 @@ /** * igb_mii_ioctl - - * @netdev: - * @ifreq: - * @cmd: + * @netdev: pointer to netdev struct + * @ifr: interface structure + * @cmd: ioctl command to execute **/ static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) { @@ -8588,9 +8987,9 @@ /** * igb_ioctl - - * @netdev: - * @ifreq: - * @cmd: + * @netdev: pointer to netdev struct + * @ifr: interface structure + * @cmd: ioctl command to execute **/ static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) { @@ -8875,7 +9274,7 @@ return __igb_shutdown(to_pci_dev(dev), NULL, 0); } -static int __maybe_unused igb_resume(struct device *dev) +static int __maybe_unused __igb_resume(struct device *dev, bool rpm) { struct pci_dev *pdev = to_pci_dev(dev); struct net_device *netdev = pci_get_drvdata(pdev); @@ -8918,21 +9317,27 @@ wr32(E1000_WUS, ~0); - rtnl_lock(); + if (!rpm) + rtnl_lock(); if (!err && netif_running(netdev)) err = __igb_open(netdev, true); if (!err) netif_device_attach(netdev); - rtnl_unlock(); + if (!rpm) + rtnl_unlock(); return err; } +static int __maybe_unused igb_resume(struct device *dev) +{ + return __igb_resume(dev, false); +} + static int __maybe_unused igb_runtime_idle(struct device *dev) { - struct pci_dev *pdev = to_pci_dev(dev); - struct net_device *netdev = pci_get_drvdata(pdev); + struct net_device *netdev = dev_get_drvdata(dev); struct igb_adapter *adapter = netdev_priv(netdev); if (!igb_has_link(adapter)) @@ -8948,7 +9353,7 @@ static int __maybe_unused igb_runtime_resume(struct device *dev) { - return igb_resume(dev); + return __igb_resume(dev, true); } static void igb_shutdown(struct pci_dev *pdev) @@ -9064,7 +9469,7 @@ * @pdev: Pointer to PCI device * * Restart the card from scratch, as if from a cold-boot. Implementation - * resembles the first-half of the igb_resume routine. + * resembles the first-half of the __igb_resume routine. **/ static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev) { @@ -9072,7 +9477,6 @@ struct igb_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; pci_ers_result_t result; - int err; if (pci_enable_device_mem(pdev)) { dev_err(&pdev->dev, @@ -9096,14 +9500,6 @@ result = PCI_ERS_RESULT_RECOVERED; } - err = pci_cleanup_aer_uncorrect_error_status(pdev); - if (err) { - dev_err(&pdev->dev, - "pci_cleanup_aer_uncorrect_error_status failed 0x%0x\n", - err); - /* non-fatal, continue */ - } - return result; } @@ -9113,7 +9509,7 @@ * * This callback is called when the error recovery driver tells us that * its OK to resume normal operation. Implementation resembles the - * second-half of the igb_resume routine. + * second-half of the __igb_resume routine. */ static void igb_io_resume(struct pci_dev *pdev) { @@ -9415,13 +9811,13 @@ reg = rd32(E1000_DTXCTL); reg |= E1000_DTXCTL_VLAN_ADDED; wr32(E1000_DTXCTL, reg); - /* Fall through */ + fallthrough; case e1000_82580: /* enable replication vlan tag stripping */ reg = rd32(E1000_RPLOLR); reg |= E1000_RPLOLR_STRVLAN; wr32(E1000_RPLOLR, reg); - /* Fall through */ + fallthrough; case e1000_i350: /* none of the above registers are supported by i350 */ break; @@ -9443,11 +9839,10 @@ struct e1000_hw *hw = &adapter->hw; u32 dmac_thr; u16 hwm; + u32 reg; if (hw->mac.type > e1000_82580) { if (adapter->flags & IGB_FLAG_DMAC) { - u32 reg; - /* force threshold to 0. */ wr32(E1000_DMCTXTH, 0); @@ -9480,7 +9875,6 @@ /* Disable BMC-to-OS Watchdog Enable */ if (hw->mac.type != e1000_i354) reg &= ~E1000_DMACR_DC_BMC2OSW_EN; - wr32(E1000_DMACR, reg); /* no lower threshold to disable @@ -9497,12 +9891,12 @@ */ wr32(E1000_DMCTXTH, (IGB_MIN_TXPBSIZE - (IGB_TX_BUF_4096 + adapter->max_frame_size)) >> 6); + } - /* make low power state decision controlled - * by DMA coal - */ + if (hw->mac.type >= e1000_i210 || + (adapter->flags & IGB_FLAG_DMAC)) { reg = rd32(E1000_PCIEMISC); - reg &= ~E1000_PCIEMISC_LX_DECISION; + reg |= E1000_PCIEMISC_LX_DECISION; wr32(E1000_PCIEMISC, reg); } /* endif adapter->dmac is not disabled */ } else if (hw->mac.type == e1000_82580) { -- Gitblit v1.6.2