From 6778948f9de86c3cfaf36725a7c87dcff9ba247f Mon Sep 17 00:00:00 2001 From: hc <hc@nodka.com> Date: Mon, 11 Dec 2023 08:20:59 +0000 Subject: [PATCH] kernel_5.10 no rt --- kernel/drivers/net/ethernet/intel/i40e/i40e_main.c | 2716 ++++++++++++++++++++++++++++++++++++++++++++-------------- 1 files changed, 2,031 insertions(+), 685 deletions(-) diff --git a/kernel/drivers/net/ethernet/intel/i40e/i40e_main.c b/kernel/drivers/net/ethernet/intel/i40e/i40e_main.c index 21ea0cd..2c60d2a 100644 --- a/kernel/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/kernel/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -5,11 +5,14 @@ #include <linux/of_net.h> #include <linux/pci.h> #include <linux/bpf.h> +#include <generated/utsrelease.h> /* Local includes */ #include "i40e.h" #include "i40e_diag.h" +#include "i40e_xsk.h" #include <net/udp_tunnel.h> +#include <net/xdp_sock_drv.h> /* All i40e tracepoints are defined by the include below, which * must be included exactly once across the whole kernel with * CREATE_TRACE_POINTS defined @@ -21,23 +24,14 @@ static const char i40e_driver_string[] = "Intel(R) Ethernet Connection XL710 Network Driver"; -#define DRV_KERN "-k" - -#define DRV_VERSION_MAJOR 2 -#define DRV_VERSION_MINOR 3 -#define DRV_VERSION_BUILD 2 -#define DRV_VERSION __stringify(DRV_VERSION_MAJOR) "." \ - __stringify(DRV_VERSION_MINOR) "." \ - __stringify(DRV_VERSION_BUILD) DRV_KERN -const char i40e_driver_version_str[] = DRV_VERSION; -static const char i40e_copyright[] = "Copyright (c) 2013 - 2014 Intel Corporation."; +static const char i40e_copyright[] = "Copyright (c) 2013 - 2019 Intel Corporation."; /* a bit of forward declarations */ static void i40e_vsi_reinit_locked(struct i40e_vsi *vsi); static void i40e_handle_reset_warning(struct i40e_pf *pf, bool lock_acquired); static int i40e_add_vsi(struct i40e_vsi *vsi); static int i40e_add_veb(struct i40e_veb *veb, struct i40e_vsi *vsi); -static int i40e_setup_pf_switch(struct i40e_pf *pf, bool reinit); +static int i40e_setup_pf_switch(struct i40e_pf *pf, bool reinit, bool lock_acquired); static int i40e_setup_misc_vector(struct i40e_pf *pf); static void i40e_determine_queue_usage(struct i40e_pf *pf); static int i40e_setup_pf_filter_control(struct i40e_pf *pf); @@ -46,11 +40,15 @@ bool lock_acquired); static int i40e_reset(struct i40e_pf *pf); static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired); +static int i40e_setup_misc_vector_for_recovery_mode(struct i40e_pf *pf); +static int i40e_restore_interrupt_scheme(struct i40e_pf *pf); +static bool i40e_check_recovery_mode(struct i40e_pf *pf); +static int i40e_init_recovery_mode(struct i40e_pf *pf, struct i40e_hw *hw); static void i40e_fdir_sb_setup(struct i40e_pf *pf); static int i40e_veb_get_bw_info(struct i40e_veb *veb); static int i40e_get_capabilities(struct i40e_pf *pf, enum i40e_admin_queue_opc list_type); - +static bool i40e_is_total_port_shutdown_enabled(struct i40e_pf *pf); /* i40e_pci_tbl - PCI Device ID Table * @@ -69,6 +67,9 @@ {PCI_VDEVICE(INTEL, I40E_DEV_ID_QSFP_C), 0}, {PCI_VDEVICE(INTEL, I40E_DEV_ID_10G_BASE_T), 0}, {PCI_VDEVICE(INTEL, I40E_DEV_ID_10G_BASE_T4), 0}, + {PCI_VDEVICE(INTEL, I40E_DEV_ID_10G_BASE_T_BC), 0}, + {PCI_VDEVICE(INTEL, I40E_DEV_ID_10G_SFP), 0}, + {PCI_VDEVICE(INTEL, I40E_DEV_ID_10G_B), 0}, {PCI_VDEVICE(INTEL, I40E_DEV_ID_KX_X722), 0}, {PCI_VDEVICE(INTEL, I40E_DEV_ID_QSFP_X722), 0}, {PCI_VDEVICE(INTEL, I40E_DEV_ID_SFP_X722), 0}, @@ -77,6 +78,8 @@ {PCI_VDEVICE(INTEL, I40E_DEV_ID_SFP_I_X722), 0}, {PCI_VDEVICE(INTEL, I40E_DEV_ID_20G_KR2), 0}, {PCI_VDEVICE(INTEL, I40E_DEV_ID_20G_KR2_A), 0}, + {PCI_VDEVICE(INTEL, I40E_DEV_ID_X710_N3000), 0}, + {PCI_VDEVICE(INTEL, I40E_DEV_ID_XXV710_N3000), 0}, {PCI_VDEVICE(INTEL, I40E_DEV_ID_25G_B), 0}, {PCI_VDEVICE(INTEL, I40E_DEV_ID_25G_SFP28), 0}, /* required last entry */ @@ -91,8 +94,7 @@ MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>"); MODULE_DESCRIPTION("Intel(R) Ethernet Connection XL710 Network Driver"); -MODULE_LICENSE("GPL"); -MODULE_VERSION(DRV_VERSION); +MODULE_LICENSE("GPL v2"); static struct workqueue_struct *i40e_wq; @@ -127,8 +129,8 @@ struct i40e_pf *pf = (struct i40e_pf *)hw->back; mem->size = ALIGN(size, alignment); - mem->va = dma_zalloc_coherent(&pf->pdev->dev, mem->size, - &mem->pa, GFP_KERNEL); + mem->va = dma_alloc_coherent(&pf->pdev->dev, mem->size, &mem->pa, + GFP_KERNEL); if (!mem->va) return -ENOMEM; @@ -302,56 +304,39 @@ **/ void i40e_service_event_schedule(struct i40e_pf *pf) { - if (!test_bit(__I40E_DOWN, pf->state) && - !test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state)) + if ((!test_bit(__I40E_DOWN, pf->state) && + !test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state)) || + test_bit(__I40E_RECOVERY_MODE, pf->state)) queue_work(i40e_wq, &pf->service_task); } /** * i40e_tx_timeout - Respond to a Tx Hang * @netdev: network interface device structure + * @txqueue: queue number timing out * * If any port has noticed a Tx timeout, it is likely that the whole * device is munged, not just the one netdev port, so go for the full * reset. **/ -static void i40e_tx_timeout(struct net_device *netdev) +static void i40e_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_vsi *vsi = np->vsi; struct i40e_pf *pf = vsi->back; struct i40e_ring *tx_ring = NULL; - unsigned int i, hung_queue = 0; + unsigned int i; u32 head, val; pf->tx_timeout_count++; - /* find the stopped queue the same way the stack does */ - for (i = 0; i < netdev->num_tx_queues; i++) { - struct netdev_queue *q; - unsigned long trans_start; - - q = netdev_get_tx_queue(netdev, i); - trans_start = q->trans_start; - if (netif_xmit_stopped(q) && - time_after(jiffies, - (trans_start + netdev->watchdog_timeo))) { - hung_queue = i; - break; - } - } - - if (i == netdev->num_tx_queues) { - netdev_info(netdev, "tx_timeout: no netdev hung queue found\n"); - } else { - /* now that we have an index, find the tx_ring struct */ - for (i = 0; i < vsi->num_queue_pairs; i++) { - if (vsi->tx_rings[i] && vsi->tx_rings[i]->desc) { - if (hung_queue == - vsi->tx_rings[i]->queue_index) { - tx_ring = vsi->tx_rings[i]; - break; - } + /* with txqueue index, find the tx_ring struct */ + for (i = 0; i < vsi->num_queue_pairs; i++) { + if (vsi->tx_rings[i] && vsi->tx_rings[i]->desc) { + if (txqueue == + vsi->tx_rings[i]->queue_index) { + tx_ring = vsi->tx_rings[i]; + break; } } } @@ -377,14 +362,14 @@ val = rd32(&pf->hw, I40E_PFINT_DYN_CTL0); netdev_info(netdev, "tx_timeout: VSI_seid: %d, Q %d, NTC: 0x%x, HWB: 0x%x, NTU: 0x%x, TAIL: 0x%x, INT: 0x%x\n", - vsi->seid, hung_queue, tx_ring->next_to_clean, + vsi->seid, txqueue, tx_ring->next_to_clean, head, tx_ring->next_to_use, readl(tx_ring->tail), val); } pf->tx_timeout_last_recovery = jiffies; - netdev_info(netdev, "tx_timeout recovery level %d, hung_queue %d\n", - pf->tx_timeout_recovery_level, hung_queue); + netdev_info(netdev, "tx_timeout recovery level %d, txqueue %d\n", + pf->tx_timeout_recovery_level, txqueue); switch (pf->tx_timeout_recovery_level) { case 1: @@ -397,7 +382,9 @@ set_bit(__I40E_GLOBAL_RESET_REQUESTED, pf->state); break; default: - netdev_err(netdev, "tx_timeout recovery unsuccessful\n"); + netdev_err(netdev, "tx_timeout recovery unsuccessful, device is in non-recoverable state.\n"); + set_bit(__I40E_DOWN_REQUESTED, pf->state); + set_bit(__I40E_VSI_DOWN_REQUESTED, vsi->state); break; } @@ -553,10 +540,55 @@ sizeof(pf->veb[i]->stats)); memset(&pf->veb[i]->stats_offsets, 0, sizeof(pf->veb[i]->stats_offsets)); + memset(&pf->veb[i]->tc_stats, 0, + sizeof(pf->veb[i]->tc_stats)); + memset(&pf->veb[i]->tc_stats_offsets, 0, + sizeof(pf->veb[i]->tc_stats_offsets)); pf->veb[i]->stat_offsets_loaded = false; } } pf->hw_csum_rx_error = 0; +} + +/** + * i40e_compute_pci_to_hw_id - compute index form PCI function. + * @vsi: ptr to the VSI to read from. + * @hw: ptr to the hardware info. + **/ +static u32 i40e_compute_pci_to_hw_id(struct i40e_vsi *vsi, struct i40e_hw *hw) +{ + int pf_count = i40e_get_pf_count(hw); + + if (vsi->type == I40E_VSI_SRIOV) + return (hw->port * BIT(7)) / pf_count + vsi->vf_id; + + return hw->port + BIT(7); +} + +/** + * i40e_stat_update64 - read and update a 64 bit stat from the chip. + * @hw: ptr to the hardware info. + * @hireg: the high 32 bit reg to read. + * @loreg: the low 32 bit reg to read. + * @offset_loaded: has the initial offset been loaded yet. + * @offset: ptr to current offset value. + * @stat: ptr to the stat. + * + * Since the device stats are not reset at PFReset, they will not + * be zeroed when the driver starts. We'll save the first values read + * and use them as offsets to be subtracted from the raw values in order + * to report stats that count from zero. + **/ +static void i40e_stat_update64(struct i40e_hw *hw, u32 hireg, u32 loreg, + bool offset_loaded, u64 *offset, u64 *stat) +{ + u64 new_data; + + new_data = rd64(hw, loreg); + + if (!offset_loaded || new_data < *offset) + *offset = new_data; + *stat = new_data - *offset; } /** @@ -631,6 +663,34 @@ } /** + * i40e_stats_update_rx_discards - update rx_discards. + * @vsi: ptr to the VSI to be updated. + * @hw: ptr to the hardware info. + * @stat_idx: VSI's stat_counter_idx. + * @offset_loaded: ptr to the VSI's stat_offsets_loaded. + * @stat_offset: ptr to stat_offset to store first read of specific register. + * @stat: ptr to VSI's stat to be updated. + **/ +static void +i40e_stats_update_rx_discards(struct i40e_vsi *vsi, struct i40e_hw *hw, + int stat_idx, bool offset_loaded, + struct i40e_eth_stats *stat_offset, + struct i40e_eth_stats *stat) +{ + u64 rx_rdpc, rx_rxerr; + + i40e_stat_update32(hw, I40E_GLV_RDPC(stat_idx), offset_loaded, + &stat_offset->rx_discards, &rx_rdpc); + i40e_stat_update64(hw, + I40E_GL_RXERR1H(i40e_compute_pci_to_hw_id(vsi, hw)), + I40E_GL_RXERR1L(i40e_compute_pci_to_hw_id(vsi, hw)), + offset_loaded, &stat_offset->rx_discards_other, + &rx_rxerr); + + stat->rx_discards = rx_rdpc + rx_rxerr; +} + +/** * i40e_update_eth_stats - Update VSI-specific ethernet statistics counters. * @vsi: the VSI to be updated **/ @@ -655,9 +715,6 @@ i40e_stat_update32(hw, I40E_GLV_RUPP(stat_idx), vsi->stat_offsets_loaded, &oes->rx_unknown_protocol, &es->rx_unknown_protocol); - i40e_stat_update32(hw, I40E_GLV_TEPC(stat_idx), - vsi->stat_offsets_loaded, - &oes->tx_errors, &es->tx_errors); i40e_stat_update48(hw, I40E_GLV_GORCH(stat_idx), I40E_GLV_GORCL(stat_idx), @@ -692,6 +749,10 @@ I40E_GLV_BPTCL(stat_idx), vsi->stat_offsets_loaded, &oes->tx_broadcast, &es->tx_broadcast); + + i40e_stats_update_rx_discards(vsi, hw, stat_idx, + vsi->stat_offsets_loaded, oes, es); + vsi->stat_offsets_loaded = true; } @@ -699,7 +760,7 @@ * i40e_update_veb_stats - Update Switch component statistics * @veb: the VEB being updated **/ -static void i40e_update_veb_stats(struct i40e_veb *veb) +void i40e_update_veb_stats(struct i40e_veb *veb) { struct i40e_pf *pf = veb->pf; struct i40e_hw *hw = &pf->hw; @@ -852,6 +913,25 @@ rx_p += packets; rx_buf += p->rx_stats.alloc_buff_failed; rx_page += p->rx_stats.alloc_page_failed; + + if (i40e_enabled_xdp_vsi(vsi)) { + /* locate XDP ring */ + p = READ_ONCE(vsi->xdp_rings[q]); + if (!p) + continue; + + do { + start = u64_stats_fetch_begin_irq(&p->syncp); + packets = p->stats.packets; + bytes = p->stats.bytes; + } while (u64_stats_fetch_retry_irq(&p->syncp, start)); + tx_b += bytes; + tx_p += packets; + tx_restart += p->tx_stats.restart_queue; + tx_busy += p->tx_stats.tx_busy; + tx_linearize += p->tx_stats.tx_linearize; + tx_force_wb += p->tx_stats.tx_force_wb; + } } rcu_read_unlock(); vsi->tx_restart = tx_restart; @@ -1129,6 +1209,25 @@ i40e_update_pf_stats(pf); i40e_update_vsi_stats(vsi); +} + +/** + * i40e_count_filters - counts VSI mac filters + * @vsi: the VSI to be searched + * + * Returns count of mac filters + **/ +int i40e_count_filters(struct i40e_vsi *vsi) +{ + struct i40e_mac_filter *f; + struct hlist_node *h; + int bkt; + int cnt = 0; + + hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) + ++cnt; + + return cnt; } /** @@ -1530,8 +1629,7 @@ bool found = false; int bkt; - WARN(!spin_is_locked(&vsi->mac_filter_hash_lock), - "Missing mac_filter_hash_lock\n"); + lockdep_assert_held(&vsi->mac_filter_hash_lock); hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) { if (ether_addr_equal(macaddr, f->macaddr)) { __i40e_del_filter(vsi, f); @@ -1569,8 +1667,8 @@ return 0; } - if (test_bit(__I40E_VSI_DOWN, vsi->back->state) || - test_bit(__I40E_RESET_RECOVERY_PENDING, vsi->back->state)) + if (test_bit(__I40E_DOWN, pf->state) || + test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state)) return -EADDRNOTAVAIL; if (ether_addr_equal(hw->mac.addr, addr->sa_data)) @@ -1594,8 +1692,7 @@ if (vsi->type == I40E_VSI_MAIN) { i40e_status ret; - ret = i40e_aq_mac_address_write(&vsi->back->hw, - I40E_AQC_WRITE_TYPE_LAA_WOL, + ret = i40e_aq_mac_address_write(hw, I40E_AQC_WRITE_TYPE_LAA_WOL, addr->sa_data, NULL); if (ret) netdev_info(netdev, "Ignoring error from firmware on LAA update, status %s, AQ ret %s\n", @@ -1606,7 +1703,7 @@ /* schedule our worker thread which will take care of * applying the new filter changes */ - i40e_service_event_schedule(vsi->back); + i40e_service_event_schedule(pf); return 0; } @@ -1614,6 +1711,8 @@ * i40e_config_rss_aq - Prepare for RSS using AQ commands * @vsi: vsi structure * @seed: RSS hash seed + * @lut: pointer to lookup table of lut_size + * @lut_size: size of the lookup table **/ static int i40e_config_rss_aq(struct i40e_vsi *vsi, const u8 *seed, u8 *lut, u16 lut_size) @@ -1635,7 +1734,7 @@ } } if (lut) { - bool pf_lut = vsi->type == I40E_VSI_MAIN ? true : false; + bool pf_lut = vsi->type == I40E_VSI_MAIN; ret = i40e_aq_set_rss_lut(hw, vsi->id, pf_lut, lut, lut_size); if (ret) { @@ -1800,6 +1899,8 @@ sections = I40E_AQ_VSI_PROP_QUEUE_MAP_VALID; offset = 0; + /* zero out queue mapping, it will get updated on the end of the function */ + memset(ctxt->info.queue_mapping, 0, sizeof(ctxt->info.queue_mapping)); if (vsi->type == I40E_VSI_MAIN) { /* This code helps add more queue to the VSI if we have @@ -1808,18 +1909,24 @@ * non-zero req_queue_pairs says that user requested a new * queue count via ethtool's set_channels, so use this * value for queues distribution across traffic classes + * We need at least one queue pair for the interface + * to be usable as we see in else statement. */ if (vsi->req_queue_pairs > 0) vsi->num_queue_pairs = vsi->req_queue_pairs; else if (pf->flags & I40E_FLAG_MSIX_ENABLED) vsi->num_queue_pairs = pf->num_lan_msix; + else + vsi->num_queue_pairs = 1; } /* Number of queues per enabled TC */ - if (vsi->type == I40E_VSI_MAIN) + if (vsi->type == I40E_VSI_MAIN || + (vsi->type == I40E_VSI_SRIOV && vsi->num_queue_pairs != 0)) num_tc_qps = vsi->num_queue_pairs; else num_tc_qps = vsi->alloc_queue_pairs; + if (enabled_tc && (vsi->back->flags & I40E_FLAG_DCB_ENABLED)) { /* Find numtc from enabled TC bitmap */ for (i = 0, numtc = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) { @@ -1858,7 +1965,7 @@ num_tc_qps); break; } - /* fall through */ + fallthrough; case I40E_VSI_FDIR: case I40E_VSI_SRIOV: case I40E_VSI_VMDQ2: @@ -1897,10 +2004,12 @@ } ctxt->info.tc_mapping[i] = cpu_to_le16(qmap); } - /* Do not change previously set num_queue_pairs for PFs */ + /* Do not change previously set num_queue_pairs for PFs and VFs*/ if ((vsi->type == I40E_VSI_MAIN && numtc != 1) || - vsi->type != I40E_VSI_MAIN) + (vsi->type == I40E_VSI_SRIOV && vsi->num_queue_pairs == 0) || + (vsi->type != I40E_VSI_MAIN && vsi->type != I40E_VSI_SRIOV)) vsi->num_queue_pairs = offset; + /* Scheduler section valid can only be set for ADD VSI */ if (is_add) { sections |= I40E_AQ_VSI_PROP_SCHED_VALID; @@ -2154,11 +2263,22 @@ fcnt = i40e_update_filter_state(num_add, list, add_head); if (fcnt != num_add) { - set_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state); - dev_warn(&vsi->back->pdev->dev, - "Error %s adding RX filters on %s, promiscuous mode forced on\n", - i40e_aq_str(hw, aq_err), - vsi_name); + if (vsi->type == I40E_VSI_MAIN) { + set_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state); + dev_warn(&vsi->back->pdev->dev, + "Error %s adding RX filters on %s, promiscuous mode forced on\n", + i40e_aq_str(hw, aq_err), vsi_name); + } else if (vsi->type == I40E_VSI_SRIOV || + vsi->type == I40E_VSI_VMDQ1 || + vsi->type == I40E_VSI_VMDQ2) { + dev_warn(&vsi->back->pdev->dev, + "Error %s adding RX filters on %s, please set promiscuous on manually for %s\n", + i40e_aq_str(hw, aq_err), vsi_name, vsi_name); + } else { + dev_warn(&vsi->back->pdev->dev, + "Error %s adding RX filters on %s, incorrect VSI type: %i.\n", + i40e_aq_str(hw, aq_err), vsi_name, vsi->type); + } } } @@ -2565,6 +2685,9 @@ vsi_name, i40e_stat_str(hw, aq_ret), i40e_aq_str(hw, hw->aq.asq_last_status)); + } else { + dev_info(&pf->pdev->dev, "%s allmulti mode.\n", + cur_multipromisc ? "entering" : "leaving"); } } @@ -2671,8 +2794,8 @@ return -EINVAL; } - netdev_info(netdev, "changing MTU from %d to %d\n", - netdev->mtu, new_mtu); + netdev_dbg(netdev, "changing MTU from %d to %d\n", + netdev->mtu, new_mtu); netdev->mtu = new_mtu; if (netif_running(netdev)) i40e_vsi_reinit_locked(vsi); @@ -3014,9 +3137,9 @@ **/ void i40e_vsi_remove_pvid(struct i40e_vsi *vsi) { - i40e_vlan_stripping_disable(vsi); - vsi->info.pvid = 0; + + i40e_vlan_stripping_disable(vsi); } /** @@ -3129,6 +3252,26 @@ } /** + * i40e_xsk_pool - Retrieve the AF_XDP buffer pool if XDP and ZC is enabled + * @ring: The Tx or Rx ring + * + * Returns the AF_XDP buffer pool or NULL. + **/ +static struct xsk_buff_pool *i40e_xsk_pool(struct i40e_ring *ring) +{ + bool xdp_on = i40e_enabled_xdp_vsi(ring->vsi); + int qid = ring->queue_index; + + if (ring_is_xdp(ring)) + qid -= ring->vsi->alloc_queue_pairs; + + if (!xdp_on || !test_bit(qid, ring->vsi->af_xdp_zc_qps)) + return NULL; + + return xsk_get_pool_from_qid(ring->vsi->netdev, qid); +} + +/** * i40e_configure_tx_ring - Configure a transmit ring context and rest * @ring: The Tx ring to configure * @@ -3142,6 +3285,9 @@ struct i40e_hmc_obj_txq tx_ctx; i40e_status err = 0; u32 qtx_ctl = 0; + + if (ring_is_xdp(ring)) + ring->xsk_pool = i40e_xsk_pool(ring); /* some ATR related tx ring init */ if (vsi->back->flags & I40E_FLAG_FD_ATR_ENABLED) { @@ -3252,13 +3398,45 @@ struct i40e_hw *hw = &vsi->back->hw; struct i40e_hmc_obj_rxq rx_ctx; i40e_status err = 0; + bool ok; + int ret; bitmap_zero(ring->state, __I40E_RING_STATE_NBITS); /* clear the context structure first */ memset(&rx_ctx, 0, sizeof(rx_ctx)); - ring->rx_buf_len = vsi->rx_buf_len; + if (ring->vsi->type == I40E_VSI_MAIN) + xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq); + + ring->xsk_pool = i40e_xsk_pool(ring); + if (ring->xsk_pool) { + ring->rx_buf_len = + xsk_pool_get_rx_frame_size(ring->xsk_pool); + /* For AF_XDP ZC, we disallow packets to span on + * multiple buffers, thus letting us skip that + * handling in the fast-path. + */ + chain_len = 1; + ret = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq, + MEM_TYPE_XSK_BUFF_POOL, + NULL); + if (ret) + return ret; + dev_info(&vsi->back->pdev->dev, + "Registered XDP mem model MEM_TYPE_XSK_BUFF_POOL on Rx ring %d\n", + ring->queue_index); + + } else { + ring->rx_buf_len = vsi->rx_buf_len; + if (ring->vsi->type == I40E_VSI_MAIN) { + ret = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq, + MEM_TYPE_PAGE_SHARED, + NULL); + if (ret) + return ret; + } + } rx_ctx.dbuff = DIV_ROUND_UP(ring->rx_buf_len, BIT_ULL(I40E_RXQ_CTX_DBUFF_SHIFT)); @@ -3266,8 +3444,8 @@ rx_ctx.base = (ring->dma / 128); rx_ctx.qlen = ring->count; - /* use 32 byte descriptors */ - rx_ctx.dsize = 1; + /* use 16 byte descriptors */ + rx_ctx.dsize = 0; /* descriptor type is always zero * rx_ctx.dtype = 0; @@ -3314,7 +3492,21 @@ ring->tail = hw->hw_addr + I40E_QRX_TAIL(pf_q); writel(0, ring->tail); - i40e_alloc_rx_buffers(ring, I40E_DESC_UNUSED(ring)); + if (ring->xsk_pool) { + xsk_pool_set_rxq_info(ring->xsk_pool, &ring->xdp_rxq); + ok = i40e_alloc_rx_buffers_zc(ring, I40E_DESC_UNUSED(ring)); + } else { + ok = !i40e_alloc_rx_buffers(ring, I40E_DESC_UNUSED(ring)); + } + if (!ok) { + /* Log this in case the user has forgotten to give the kernel + * any buffers, even later in the application. + */ + dev_info(&vsi->back->pdev->dev, + "Failed to allocate some buffers on %sRx ring %d (pf_q %d)\n", + ring->xsk_pool ? "AF_XDP ZC enabled " : "", + ring->queue_index, pf_q); + } return 0; } @@ -3333,7 +3525,7 @@ for (i = 0; (i < vsi->num_queue_pairs) && !err; i++) err = i40e_configure_tx_ring(vsi->tx_rings[i]); - if (!i40e_enabled_xdp_vsi(vsi)) + if (err || !i40e_enabled_xdp_vsi(vsi)) return err; for (i = 0; (i < vsi->num_queue_pairs) && !err; i++) @@ -3631,7 +3823,7 @@ (I40E_QUEUE_TYPE_TX << I40E_QINT_TQCTL_NEXTQ_TYPE_SHIFT); - wr32(hw, I40E_QINT_TQCTL(nextqp), val); + wr32(hw, I40E_QINT_TQCTL(nextqp), val); } val = I40E_QINT_TQCTL_CAUSE_ENA_MASK | @@ -4006,7 +4198,8 @@ enable_intr: /* re-enable interrupt causes */ wr32(hw, I40E_PFINT_ICR0_ENA, ena_mask); - if (!test_bit(__I40E_DOWN, pf->state)) { + if (!test_bit(__I40E_DOWN, pf->state) || + test_bit(__I40E_RECOVERY_MODE, pf->state)) { i40e_service_event_schedule(pf); i40e_irq_dynamic_enable_icr0(pf); } @@ -5321,6 +5514,58 @@ } /** + * i40e_update_adq_vsi_queues - update queue mapping for ADq VSI + * @vsi: the VSI being reconfigured + * @vsi_offset: offset from main VF VSI + */ +int i40e_update_adq_vsi_queues(struct i40e_vsi *vsi, int vsi_offset) +{ + struct i40e_vsi_context ctxt = {}; + struct i40e_pf *pf; + struct i40e_hw *hw; + int ret; + + if (!vsi) + return I40E_ERR_PARAM; + pf = vsi->back; + hw = &pf->hw; + + ctxt.seid = vsi->seid; + ctxt.pf_num = hw->pf_id; + ctxt.vf_num = vsi->vf_id + hw->func_caps.vf_base_id + vsi_offset; + ctxt.uplink_seid = vsi->uplink_seid; + ctxt.connection_type = I40E_AQ_VSI_CONN_TYPE_NORMAL; + ctxt.flags = I40E_AQ_VSI_TYPE_VF; + ctxt.info = vsi->info; + + i40e_vsi_setup_queue_map(vsi, &ctxt, vsi->tc_config.enabled_tc, + false); + if (vsi->reconfig_rss) { + vsi->rss_size = min_t(int, pf->alloc_rss_size, + vsi->num_queue_pairs); + ret = i40e_vsi_config_rss(vsi); + if (ret) { + dev_info(&pf->pdev->dev, "Failed to reconfig rss for num_queues\n"); + return ret; + } + vsi->reconfig_rss = false; + } + + ret = i40e_aq_update_vsi_params(hw, &ctxt, NULL); + if (ret) { + dev_info(&pf->pdev->dev, "Update vsi config failed, err %s aq_err %s\n", + i40e_stat_str(hw, ret), + i40e_aq_str(hw, hw->aq.asq_last_status)); + return ret; + } + /* update the local VSI info with updated queue map */ + i40e_vsi_update_queue_map(vsi, &ctxt); + vsi->info.valid_sections = 0; + + return ret; +} + +/** * i40e_vsi_config_tc - Configure VSI Tx Scheduler for given TC map * @vsi: VSI to be configured * @enabled_tc: TC bitmap @@ -5482,6 +5727,26 @@ } /** + * i40e_bw_bytes_to_mbits - Convert max_tx_rate from bytes to mbits + * @vsi: Pointer to vsi structure + * @max_tx_rate: max TX rate in bytes to be converted into Mbits + * + * Helper function to convert units before send to set BW limit + **/ +static u64 i40e_bw_bytes_to_mbits(struct i40e_vsi *vsi, u64 max_tx_rate) +{ + if (max_tx_rate < I40E_BW_MBPS_DIVISOR) { + dev_warn(&vsi->back->pdev->dev, + "Setting max tx rate to minimum usable value of 50Mbps.\n"); + max_tx_rate = I40E_BW_CREDIT_DIVISOR; + } else { + do_div(max_tx_rate, I40E_BW_MBPS_DIVISOR); + } + + return max_tx_rate; +} + +/** * i40e_set_bw_limit - setup BW limit for Tx traffic based on max_tx_rate * @vsi: VSI to be configured * @seid: seid of the channel/VSI @@ -5503,10 +5768,10 @@ max_tx_rate, seid); return -EINVAL; } - if (max_tx_rate && max_tx_rate < 50) { + if (max_tx_rate && max_tx_rate < I40E_BW_CREDIT_DIVISOR) { dev_warn(&pf->pdev->dev, "Setting max tx rate to minimum usable value of 50Mbps.\n"); - max_tx_rate = 50; + max_tx_rate = I40E_BW_CREDIT_DIVISOR; } /* Tx rate credits are in values of 50Mbps, 0 is disabled */ @@ -5608,24 +5873,6 @@ kfree(ch); } INIT_LIST_HEAD(&vsi->ch_list); -} - -/** - * i40e_is_any_channel - channel exist or not - * @vsi: ptr to VSI to which channels are associated with - * - * Returns true or false if channel(s) exist for associated VSI or not - **/ -static bool i40e_is_any_channel(struct i40e_vsi *vsi) -{ - struct i40e_channel *ch, *ch_tmp; - - list_for_each_entry_safe(ch, ch_tmp, &vsi->ch_list, list) { - if (ch->initialized) - return true; - } - - return false; } /** @@ -5764,7 +6011,6 @@ /** * i40e_channel_setup_queue_map - Setup a channel queue map * @pf: ptr to PF device - * @vsi: the VSI being setup * @ctxt: VSI context structure * @ch: ptr to channel structure * @@ -5852,11 +6098,13 @@ return -ENOENT; } - /* Success, update channel */ - ch->enabled_tc = enabled_tc; + /* Success, update channel, set enabled_tc only if the channel + * is not a macvlan + */ + ch->enabled_tc = !i40e_is_channel_macvlan(ch) && enabled_tc; ch->seid = ctxt.seid; ch->vsi_number = ctxt.vsi_number; - ch->stat_counter_idx = cpu_to_le16(ctxt.info.stat_counter_idx); + ch->stat_counter_idx = le16_to_cpu(ctxt.info.stat_counter_idx); /* copy just the sections touched not the entire info * since not all sections are valid as returned by @@ -6005,8 +6253,7 @@ /** * i40e_setup_channel - setup new channel using uplink element * @pf: ptr to PF device - * @type: type of channel to be created (VMDq2/VF) - * @uplink_seid: underlying HW switching element (VEB) ID + * @vsi: pointer to the VSI to set up the channel within * @ch: ptr to channel structure * * Setup new channel (VSI) based on specified type (VMDq2/VF) @@ -6133,26 +6380,15 @@ /* By default we are in VEPA mode, if this is the first VF/VMDq * VSI to be added switch to VEB mode. */ - if ((!(pf->flags & I40E_FLAG_VEB_MODE_ENABLED)) || - (!i40e_is_any_channel(vsi))) { - if (!is_power_of_2(vsi->tc_config.tc_info[0].qcount)) { - dev_dbg(&pf->pdev->dev, - "Failed to create channel. Override queues (%u) not power of 2\n", - vsi->tc_config.tc_info[0].qcount); - return -EINVAL; - } - if (!(pf->flags & I40E_FLAG_VEB_MODE_ENABLED)) { - pf->flags |= I40E_FLAG_VEB_MODE_ENABLED; + if (!(pf->flags & I40E_FLAG_VEB_MODE_ENABLED)) { + pf->flags |= I40E_FLAG_VEB_MODE_ENABLED; - if (vsi->type == I40E_VSI_MAIN) { - if (pf->flags & I40E_FLAG_TC_MQPRIO) - i40e_do_reset(pf, I40E_PF_RESET_FLAG, - true); - else - i40e_do_reset_safe(pf, - I40E_PF_RESET_FLAG); - } + if (vsi->type == I40E_VSI_MAIN) { + if (pf->flags & I40E_FLAG_TC_MQPRIO) + i40e_do_reset(pf, I40E_PF_RESET_FLAG, true); + else + i40e_do_reset_safe(pf, I40E_PF_RESET_FLAG); } /* now onwards for main VSI, number of queues will be value * of TC0's queue count @@ -6416,11 +6652,13 @@ * Also do not enable DCBx if FW LLDP agent is disabled */ if ((pf->hw_features & I40E_HW_NO_DCB_SUPPORT) || - (pf->flags & I40E_FLAG_DISABLE_FW_LLDP)) + (pf->flags & I40E_FLAG_DISABLE_FW_LLDP)) { + dev_info(&pf->pdev->dev, "DCB is not supported or FW LLDP is disabled\n"); + err = I40E_NOT_SUPPORTED; goto out; + } - /* Get the initial DCB configuration */ - err = i40e_init_dcb(hw); + err = i40e_init_dcb(hw, true); if (!err) { /* Device/Function is not DCBX capable */ if ((!hw->func_caps.dcb) || @@ -6457,8 +6695,7 @@ return err; } #endif /* CONFIG_I40E_DCB */ -#define SPEED_SIZE 14 -#define FC_SIZE 8 + /** * i40e_print_link_message - print link up or down * @vsi: the VSI for which link needs a message @@ -6474,7 +6711,10 @@ char *req_fec = ""; char *an = ""; - new_speed = pf->hw.phy.link_info.link_speed; + if (isup) + new_speed = pf->hw.phy.link_info.link_speed; + else + new_speed = I40E_LINK_SPEED_UNKNOWN; if ((vsi->current_isup == isup) && (vsi->current_speed == new_speed)) return; @@ -6507,6 +6747,12 @@ case I40E_LINK_SPEED_10GB: speed = "10 G"; break; + case I40E_LINK_SPEED_5GB: + speed = "5 G"; + break; + case I40E_LINK_SPEED_2_5GB: + speed = "2.5 G"; + break; case I40E_LINK_SPEED_1GB: speed = "1000 M"; break; @@ -6533,19 +6779,19 @@ } if (pf->hw.phy.link_info.link_speed == I40E_LINK_SPEED_25GB) { - req_fec = ", Requested FEC: None"; - fec = ", FEC: None"; - an = ", Autoneg: False"; + req_fec = "None"; + fec = "None"; + an = "False"; if (pf->hw.phy.link_info.an_info & I40E_AQ_AN_COMPLETED) - an = ", Autoneg: True"; + an = "True"; if (pf->hw.phy.link_info.fec_info & I40E_AQ_CONFIG_FEC_KR_ENA) - fec = ", FEC: CL74 FC-FEC/BASE-R"; + fec = "CL74 FC-FEC/BASE-R"; else if (pf->hw.phy.link_info.fec_info & I40E_AQ_CONFIG_FEC_RS_ENA) - fec = ", FEC: CL108 RS-FEC"; + fec = "CL108 RS-FEC"; /* 'CL108 RS-FEC' should be displayed when RS is requested, or * both RS and FC are requested @@ -6554,14 +6800,38 @@ (I40E_AQ_REQUEST_FEC_KR | I40E_AQ_REQUEST_FEC_RS)) { if (vsi->back->hw.phy.link_info.req_fec_info & I40E_AQ_REQUEST_FEC_RS) - req_fec = ", Requested FEC: CL108 RS-FEC"; + req_fec = "CL108 RS-FEC"; else - req_fec = ", Requested FEC: CL74 FC-FEC/BASE-R"; + req_fec = "CL74 FC-FEC/BASE-R"; } + netdev_info(vsi->netdev, + "NIC Link is Up, %sbps Full Duplex, Requested FEC: %s, Negotiated FEC: %s, Autoneg: %s, Flow Control: %s\n", + speed, req_fec, fec, an, fc); + } else if (pf->hw.device_id == I40E_DEV_ID_KX_X722) { + req_fec = "None"; + fec = "None"; + an = "False"; + + if (pf->hw.phy.link_info.an_info & I40E_AQ_AN_COMPLETED) + an = "True"; + + if (pf->hw.phy.link_info.fec_info & + I40E_AQ_CONFIG_FEC_KR_ENA) + fec = "CL74 FC-FEC/BASE-R"; + + if (pf->hw.phy.link_info.req_fec_info & + I40E_AQ_REQUEST_FEC_KR) + req_fec = "CL74 FC-FEC/BASE-R"; + + netdev_info(vsi->netdev, + "NIC Link is Up, %sbps Full Duplex, Requested FEC: %s, Negotiated FEC: %s, Autoneg: %s, Flow Control: %s\n", + speed, req_fec, fec, an, fc); + } else { + netdev_info(vsi->netdev, + "NIC Link is Up, %sbps Full Duplex, Flow Control: %s\n", + speed, fc); } - netdev_info(vsi->netdev, "NIC Link is Up, %sbps Full Duplex%s%s%s, Flow Control: %s\n", - speed, req_fec, fec, an, fc); } /** @@ -6622,28 +6892,12 @@ { struct i40e_pf *pf = vsi->back; - WARN_ON(in_interrupt()); while (test_and_set_bit(__I40E_CONFIG_BUSY, pf->state)) usleep_range(1000, 2000); i40e_down(vsi); i40e_up(vsi); clear_bit(__I40E_CONFIG_BUSY, pf->state); -} - -/** - * i40e_up - Bring the connection back up after being down - * @vsi: the VSI being configured - **/ -int i40e_up(struct i40e_vsi *vsi) -{ - int err; - - err = i40e_vsi_configure(vsi); - if (!err) - err = i40e_up_complete(vsi); - - return err; } /** @@ -6655,6 +6909,7 @@ { struct i40e_aq_get_phy_abilities_resp abilities; struct i40e_aq_set_phy_config config = {0}; + bool non_zero_phy_type = is_up; struct i40e_hw *hw = &pf->hw; i40e_status err; u64 mask; @@ -6690,8 +6945,11 @@ /* If link needs to go up, but was not forced to go down, * and its speed values are OK, no need for a flap + * if non_zero_phy_type was set, still need to force up */ - if (is_up && abilities.phy_type != 0 && abilities.link_speed != 0) + if (pf->flags & I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENABLED) + non_zero_phy_type = true; + else if (is_up && abilities.phy_type != 0 && abilities.link_speed != 0) return I40E_SUCCESS; /* To force link we need to set bits for all supported PHY types, @@ -6699,10 +6957,18 @@ * across two fields. */ mask = I40E_PHY_TYPES_BITMASK; - config.phy_type = is_up ? cpu_to_le32((u32)(mask & 0xffffffff)) : 0; - config.phy_type_ext = is_up ? (u8)((mask >> 32) & 0xff) : 0; + config.phy_type = + non_zero_phy_type ? cpu_to_le32((u32)(mask & 0xffffffff)) : 0; + config.phy_type_ext = + non_zero_phy_type ? (u8)((mask >> 32) & 0xff) : 0; /* Copy the old settings, except of phy_type */ config.abilities = abilities.abilities; + if (pf->flags & I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENABLED) { + if (is_up) + config.abilities |= I40E_AQ_PHY_ENABLE_LINK; + else + config.abilities &= ~(I40E_AQ_PHY_ENABLE_LINK); + } if (abilities.link_speed != 0) config.link_speed = abilities.link_speed; else @@ -6733,9 +6999,29 @@ i40e_update_link_info(hw); } - i40e_aq_set_link_restart_an(hw, true, NULL); + i40e_aq_set_link_restart_an(hw, is_up, NULL); return I40E_SUCCESS; +} + +/** + * i40e_up - Bring the connection back up after being down + * @vsi: the VSI being configured + **/ +int i40e_up(struct i40e_vsi *vsi) +{ + int err; + + if (vsi->type == I40E_VSI_MAIN && + (vsi->back->flags & I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED || + vsi->back->flags & I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENABLED)) + i40e_force_link_state(vsi->back, true); + + err = i40e_vsi_configure(vsi); + if (!err) + err = i40e_up_complete(vsi); + + return err; } /** @@ -6756,14 +7042,20 @@ i40e_vsi_disable_irq(vsi); i40e_vsi_stop_rings(vsi); if (vsi->type == I40E_VSI_MAIN && - vsi->back->flags & I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED) + (vsi->back->flags & I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED || + vsi->back->flags & I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENABLED)) i40e_force_link_state(vsi->back, false); i40e_napi_disable_all(vsi); for (i = 0; i < vsi->num_queue_pairs; i++) { i40e_clean_tx_ring(vsi->tx_rings[i]); - if (i40e_enabled_xdp_vsi(vsi)) + if (i40e_enabled_xdp_vsi(vsi)) { + /* Make sure that in-progress ndo_xdp_xmit and + * ndo_xsk_wakeup calls are completed. + */ + synchronize_rcu(); i40e_clean_tx_ring(vsi->xdp_rings[i]); + } i40e_clean_rx_ring(vsi->rx_rings[i]); } @@ -6841,6 +7133,491 @@ else vsi->tc_config.tc_info[i].qcount = 1; vsi->tc_config.tc_info[i].netdev_tc = 0; + } +} + +/** + * i40e_del_macvlan_filter + * @hw: pointer to the HW structure + * @seid: seid of the channel VSI + * @macaddr: the mac address to apply as a filter + * @aq_err: store the admin Q error + * + * This function deletes a mac filter on the channel VSI which serves as the + * macvlan. Returns 0 on success. + **/ +static i40e_status i40e_del_macvlan_filter(struct i40e_hw *hw, u16 seid, + const u8 *macaddr, int *aq_err) +{ + struct i40e_aqc_remove_macvlan_element_data element; + i40e_status status; + + memset(&element, 0, sizeof(element)); + ether_addr_copy(element.mac_addr, macaddr); + element.vlan_tag = 0; + element.flags = I40E_AQC_MACVLAN_DEL_PERFECT_MATCH; + status = i40e_aq_remove_macvlan(hw, seid, &element, 1, NULL); + *aq_err = hw->aq.asq_last_status; + + return status; +} + +/** + * i40e_add_macvlan_filter + * @hw: pointer to the HW structure + * @seid: seid of the channel VSI + * @macaddr: the mac address to apply as a filter + * @aq_err: store the admin Q error + * + * This function adds a mac filter on the channel VSI which serves as the + * macvlan. Returns 0 on success. + **/ +static i40e_status i40e_add_macvlan_filter(struct i40e_hw *hw, u16 seid, + const u8 *macaddr, int *aq_err) +{ + struct i40e_aqc_add_macvlan_element_data element; + i40e_status status; + u16 cmd_flags = 0; + + ether_addr_copy(element.mac_addr, macaddr); + element.vlan_tag = 0; + element.queue_number = 0; + element.match_method = I40E_AQC_MM_ERR_NO_RES; + cmd_flags |= I40E_AQC_MACVLAN_ADD_PERFECT_MATCH; + element.flags = cpu_to_le16(cmd_flags); + status = i40e_aq_add_macvlan(hw, seid, &element, 1, NULL); + *aq_err = hw->aq.asq_last_status; + + return status; +} + +/** + * i40e_reset_ch_rings - Reset the queue contexts in a channel + * @vsi: the VSI we want to access + * @ch: the channel we want to access + */ +static void i40e_reset_ch_rings(struct i40e_vsi *vsi, struct i40e_channel *ch) +{ + struct i40e_ring *tx_ring, *rx_ring; + u16 pf_q; + int i; + + for (i = 0; i < ch->num_queue_pairs; i++) { + pf_q = ch->base_queue + i; + tx_ring = vsi->tx_rings[pf_q]; + tx_ring->ch = NULL; + rx_ring = vsi->rx_rings[pf_q]; + rx_ring->ch = NULL; + } +} + +/** + * i40e_free_macvlan_channels + * @vsi: the VSI we want to access + * + * This function frees the Qs of the channel VSI from + * the stack and also deletes the channel VSIs which + * serve as macvlans. + */ +static void i40e_free_macvlan_channels(struct i40e_vsi *vsi) +{ + struct i40e_channel *ch, *ch_tmp; + int ret; + + if (list_empty(&vsi->macvlan_list)) + return; + + list_for_each_entry_safe(ch, ch_tmp, &vsi->macvlan_list, list) { + struct i40e_vsi *parent_vsi; + + if (i40e_is_channel_macvlan(ch)) { + i40e_reset_ch_rings(vsi, ch); + clear_bit(ch->fwd->bit_no, vsi->fwd_bitmask); + netdev_unbind_sb_channel(vsi->netdev, ch->fwd->netdev); + netdev_set_sb_channel(ch->fwd->netdev, 0); + kfree(ch->fwd); + ch->fwd = NULL; + } + + list_del(&ch->list); + parent_vsi = ch->parent_vsi; + if (!parent_vsi || !ch->initialized) { + kfree(ch); + continue; + } + + /* remove the VSI */ + ret = i40e_aq_delete_element(&vsi->back->hw, ch->seid, + NULL); + if (ret) + dev_err(&vsi->back->pdev->dev, + "unable to remove channel (%d) for parent VSI(%d)\n", + ch->seid, parent_vsi->seid); + kfree(ch); + } + vsi->macvlan_cnt = 0; +} + +/** + * i40e_fwd_ring_up - bring the macvlan device up + * @vsi: the VSI we want to access + * @vdev: macvlan netdevice + * @fwd: the private fwd structure + */ +static int i40e_fwd_ring_up(struct i40e_vsi *vsi, struct net_device *vdev, + struct i40e_fwd_adapter *fwd) +{ + struct i40e_channel *ch = NULL, *ch_tmp, *iter; + int ret = 0, num_tc = 1, i, aq_err; + struct i40e_pf *pf = vsi->back; + struct i40e_hw *hw = &pf->hw; + + /* Go through the list and find an available channel */ + list_for_each_entry_safe(iter, ch_tmp, &vsi->macvlan_list, list) { + if (!i40e_is_channel_macvlan(iter)) { + iter->fwd = fwd; + /* record configuration for macvlan interface in vdev */ + for (i = 0; i < num_tc; i++) + netdev_bind_sb_channel_queue(vsi->netdev, vdev, + i, + iter->num_queue_pairs, + iter->base_queue); + for (i = 0; i < iter->num_queue_pairs; i++) { + struct i40e_ring *tx_ring, *rx_ring; + u16 pf_q; + + pf_q = iter->base_queue + i; + + /* Get to TX ring ptr */ + tx_ring = vsi->tx_rings[pf_q]; + tx_ring->ch = iter; + + /* Get the RX ring ptr */ + rx_ring = vsi->rx_rings[pf_q]; + rx_ring->ch = iter; + } + ch = iter; + break; + } + } + + if (!ch) + return -EINVAL; + + /* Guarantee all rings are updated before we update the + * MAC address filter. + */ + wmb(); + + /* Add a mac filter */ + ret = i40e_add_macvlan_filter(hw, ch->seid, vdev->dev_addr, &aq_err); + if (ret) { + /* if we cannot add the MAC rule then disable the offload */ + macvlan_release_l2fw_offload(vdev); + for (i = 0; i < ch->num_queue_pairs; i++) { + struct i40e_ring *rx_ring; + u16 pf_q; + + pf_q = ch->base_queue + i; + rx_ring = vsi->rx_rings[pf_q]; + rx_ring->netdev = NULL; + } + dev_info(&pf->pdev->dev, + "Error adding mac filter on macvlan err %s, aq_err %s\n", + i40e_stat_str(hw, ret), + i40e_aq_str(hw, aq_err)); + netdev_err(vdev, "L2fwd offload disabled to L2 filter error\n"); + } + + return ret; +} + +/** + * i40e_setup_macvlans - create the channels which will be macvlans + * @vsi: the VSI we want to access + * @macvlan_cnt: no. of macvlans to be setup + * @qcnt: no. of Qs per macvlan + * @vdev: macvlan netdevice + */ +static int i40e_setup_macvlans(struct i40e_vsi *vsi, u16 macvlan_cnt, u16 qcnt, + struct net_device *vdev) +{ + struct i40e_pf *pf = vsi->back; + struct i40e_hw *hw = &pf->hw; + struct i40e_vsi_context ctxt; + u16 sections, qmap, num_qps; + struct i40e_channel *ch; + int i, pow, ret = 0; + u8 offset = 0; + + if (vsi->type != I40E_VSI_MAIN || !macvlan_cnt) + return -EINVAL; + + num_qps = vsi->num_queue_pairs - (macvlan_cnt * qcnt); + + /* find the next higher power-of-2 of num queue pairs */ + pow = fls(roundup_pow_of_two(num_qps) - 1); + + qmap = (offset << I40E_AQ_VSI_TC_QUE_OFFSET_SHIFT) | + (pow << I40E_AQ_VSI_TC_QUE_NUMBER_SHIFT); + + /* Setup context bits for the main VSI */ + sections = I40E_AQ_VSI_PROP_QUEUE_MAP_VALID; + sections |= I40E_AQ_VSI_PROP_SCHED_VALID; + memset(&ctxt, 0, sizeof(ctxt)); + ctxt.seid = vsi->seid; + ctxt.pf_num = vsi->back->hw.pf_id; + ctxt.vf_num = 0; + ctxt.uplink_seid = vsi->uplink_seid; + ctxt.info = vsi->info; + ctxt.info.tc_mapping[0] = cpu_to_le16(qmap); + ctxt.info.mapping_flags |= cpu_to_le16(I40E_AQ_VSI_QUE_MAP_CONTIG); + ctxt.info.queue_mapping[0] = cpu_to_le16(vsi->base_queue); + ctxt.info.valid_sections |= cpu_to_le16(sections); + + /* Reconfigure RSS for main VSI with new max queue count */ + vsi->rss_size = max_t(u16, num_qps, qcnt); + ret = i40e_vsi_config_rss(vsi); + if (ret) { + dev_info(&pf->pdev->dev, + "Failed to reconfig RSS for num_queues (%u)\n", + vsi->rss_size); + return ret; + } + vsi->reconfig_rss = true; + dev_dbg(&vsi->back->pdev->dev, + "Reconfigured RSS with num_queues (%u)\n", vsi->rss_size); + vsi->next_base_queue = num_qps; + vsi->cnt_q_avail = vsi->num_queue_pairs - num_qps; + + /* Update the VSI after updating the VSI queue-mapping + * information + */ + ret = i40e_aq_update_vsi_params(hw, &ctxt, NULL); + if (ret) { + dev_info(&pf->pdev->dev, + "Update vsi tc config failed, err %s aq_err %s\n", + i40e_stat_str(hw, ret), + i40e_aq_str(hw, hw->aq.asq_last_status)); + return ret; + } + /* update the local VSI info with updated queue map */ + i40e_vsi_update_queue_map(vsi, &ctxt); + vsi->info.valid_sections = 0; + + /* Create channels for macvlans */ + INIT_LIST_HEAD(&vsi->macvlan_list); + for (i = 0; i < macvlan_cnt; i++) { + ch = kzalloc(sizeof(*ch), GFP_KERNEL); + if (!ch) { + ret = -ENOMEM; + goto err_free; + } + INIT_LIST_HEAD(&ch->list); + ch->num_queue_pairs = qcnt; + if (!i40e_setup_channel(pf, vsi, ch)) { + ret = -EINVAL; + kfree(ch); + goto err_free; + } + ch->parent_vsi = vsi; + vsi->cnt_q_avail -= ch->num_queue_pairs; + vsi->macvlan_cnt++; + list_add_tail(&ch->list, &vsi->macvlan_list); + } + + return ret; + +err_free: + dev_info(&pf->pdev->dev, "Failed to setup macvlans\n"); + i40e_free_macvlan_channels(vsi); + + return ret; +} + +/** + * i40e_fwd_add - configure macvlans + * @netdev: net device to configure + * @vdev: macvlan netdevice + **/ +static void *i40e_fwd_add(struct net_device *netdev, struct net_device *vdev) +{ + struct i40e_netdev_priv *np = netdev_priv(netdev); + u16 q_per_macvlan = 0, macvlan_cnt = 0, vectors; + struct i40e_vsi *vsi = np->vsi; + struct i40e_pf *pf = vsi->back; + struct i40e_fwd_adapter *fwd; + int avail_macvlan, ret; + + if ((pf->flags & I40E_FLAG_DCB_ENABLED)) { + netdev_info(netdev, "Macvlans are not supported when DCB is enabled\n"); + return ERR_PTR(-EINVAL); + } + if ((pf->flags & I40E_FLAG_TC_MQPRIO)) { + netdev_info(netdev, "Macvlans are not supported when HW TC offload is on\n"); + return ERR_PTR(-EINVAL); + } + if (pf->num_lan_msix < I40E_MIN_MACVLAN_VECTORS) { + netdev_info(netdev, "Not enough vectors available to support macvlans\n"); + return ERR_PTR(-EINVAL); + } + + /* The macvlan device has to be a single Q device so that the + * tc_to_txq field can be reused to pick the tx queue. + */ + if (netif_is_multiqueue(vdev)) + return ERR_PTR(-ERANGE); + + if (!vsi->macvlan_cnt) { + /* reserve bit 0 for the pf device */ + set_bit(0, vsi->fwd_bitmask); + + /* Try to reserve as many queues as possible for macvlans. First + * reserve 3/4th of max vectors, then half, then quarter and + * calculate Qs per macvlan as you go + */ + vectors = pf->num_lan_msix; + if (vectors <= I40E_MAX_MACVLANS && vectors > 64) { + /* allocate 4 Qs per macvlan and 32 Qs to the PF*/ + q_per_macvlan = 4; + macvlan_cnt = (vectors - 32) / 4; + } else if (vectors <= 64 && vectors > 32) { + /* allocate 2 Qs per macvlan and 16 Qs to the PF*/ + q_per_macvlan = 2; + macvlan_cnt = (vectors - 16) / 2; + } else if (vectors <= 32 && vectors > 16) { + /* allocate 1 Q per macvlan and 16 Qs to the PF*/ + q_per_macvlan = 1; + macvlan_cnt = vectors - 16; + } else if (vectors <= 16 && vectors > 8) { + /* allocate 1 Q per macvlan and 8 Qs to the PF */ + q_per_macvlan = 1; + macvlan_cnt = vectors - 8; + } else { + /* allocate 1 Q per macvlan and 1 Q to the PF */ + q_per_macvlan = 1; + macvlan_cnt = vectors - 1; + } + + if (macvlan_cnt == 0) + return ERR_PTR(-EBUSY); + + /* Quiesce VSI queues */ + i40e_quiesce_vsi(vsi); + + /* sets up the macvlans but does not "enable" them */ + ret = i40e_setup_macvlans(vsi, macvlan_cnt, q_per_macvlan, + vdev); + if (ret) + return ERR_PTR(ret); + + /* Unquiesce VSI */ + i40e_unquiesce_vsi(vsi); + } + avail_macvlan = find_first_zero_bit(vsi->fwd_bitmask, + vsi->macvlan_cnt); + if (avail_macvlan >= I40E_MAX_MACVLANS) + return ERR_PTR(-EBUSY); + + /* create the fwd struct */ + fwd = kzalloc(sizeof(*fwd), GFP_KERNEL); + if (!fwd) + return ERR_PTR(-ENOMEM); + + set_bit(avail_macvlan, vsi->fwd_bitmask); + fwd->bit_no = avail_macvlan; + netdev_set_sb_channel(vdev, avail_macvlan); + fwd->netdev = vdev; + + if (!netif_running(netdev)) + return fwd; + + /* Set fwd ring up */ + ret = i40e_fwd_ring_up(vsi, vdev, fwd); + if (ret) { + /* unbind the queues and drop the subordinate channel config */ + netdev_unbind_sb_channel(netdev, vdev); + netdev_set_sb_channel(vdev, 0); + + kfree(fwd); + return ERR_PTR(-EINVAL); + } + + return fwd; +} + +/** + * i40e_del_all_macvlans - Delete all the mac filters on the channels + * @vsi: the VSI we want to access + */ +static void i40e_del_all_macvlans(struct i40e_vsi *vsi) +{ + struct i40e_channel *ch, *ch_tmp; + struct i40e_pf *pf = vsi->back; + struct i40e_hw *hw = &pf->hw; + int aq_err, ret = 0; + + if (list_empty(&vsi->macvlan_list)) + return; + + list_for_each_entry_safe(ch, ch_tmp, &vsi->macvlan_list, list) { + if (i40e_is_channel_macvlan(ch)) { + ret = i40e_del_macvlan_filter(hw, ch->seid, + i40e_channel_mac(ch), + &aq_err); + if (!ret) { + /* Reset queue contexts */ + i40e_reset_ch_rings(vsi, ch); + clear_bit(ch->fwd->bit_no, vsi->fwd_bitmask); + netdev_unbind_sb_channel(vsi->netdev, + ch->fwd->netdev); + netdev_set_sb_channel(ch->fwd->netdev, 0); + kfree(ch->fwd); + ch->fwd = NULL; + } + } + } +} + +/** + * i40e_fwd_del - delete macvlan interfaces + * @netdev: net device to configure + * @vdev: macvlan netdevice + */ +static void i40e_fwd_del(struct net_device *netdev, void *vdev) +{ + struct i40e_netdev_priv *np = netdev_priv(netdev); + struct i40e_fwd_adapter *fwd = vdev; + struct i40e_channel *ch, *ch_tmp; + struct i40e_vsi *vsi = np->vsi; + struct i40e_pf *pf = vsi->back; + struct i40e_hw *hw = &pf->hw; + int aq_err, ret = 0; + + /* Find the channel associated with the macvlan and del mac filter */ + list_for_each_entry_safe(ch, ch_tmp, &vsi->macvlan_list, list) { + if (i40e_is_channel_macvlan(ch) && + ether_addr_equal(i40e_channel_mac(ch), + fwd->netdev->dev_addr)) { + ret = i40e_del_macvlan_filter(hw, ch->seid, + i40e_channel_mac(ch), + &aq_err); + if (!ret) { + /* Reset queue contexts */ + i40e_reset_ch_rings(vsi, ch); + clear_bit(ch->fwd->bit_no, vsi->fwd_bitmask); + netdev_unbind_sb_channel(netdev, fwd->netdev); + netdev_set_sb_channel(fwd->netdev, 0); + kfree(ch->fwd); + ch->fwd = NULL; + } else { + dev_info(&pf->pdev->dev, + "Error deleting mac filter on macvlan err %s, aq_err %s\n", + i40e_stat_str(hw, ret), + i40e_aq_str(hw, aq_err)); + } + break; + } } } @@ -6939,13 +7716,25 @@ vsi->seid); need_reset = true; goto exit; + } else if (enabled_tc && + (!is_power_of_2(vsi->tc_config.tc_info[0].qcount))) { + netdev_info(netdev, + "Failed to create channel. Override queues (%u) not power of 2\n", + vsi->tc_config.tc_info[0].qcount); + ret = -EINVAL; + need_reset = true; + goto exit; } + + dev_info(&vsi->back->pdev->dev, + "Setup channel (id:%u) utilizing num_queues %d\n", + vsi->seid, vsi->tc_config.tc_info[0].qcount); if (pf->flags & I40E_FLAG_TC_MQPRIO) { if (vsi->mqprio_qopt.max_rate[0]) { - u64 max_tx_rate = vsi->mqprio_qopt.max_rate[0]; + u64 max_tx_rate = i40e_bw_bytes_to_mbits(vsi, + vsi->mqprio_qopt.max_rate[0]); - do_div(max_tx_rate, I40E_BW_MBPS_DIVISOR); ret = i40e_set_bw_limit(vsi, vsi->seid, max_tx_rate); if (!ret) { u64 credits = max_tx_rate; @@ -6994,8 +7783,8 @@ i40e_set_cld_element(struct i40e_cloud_filter *filter, struct i40e_aqc_cloud_filters_element_data *cld) { - int i, j; u32 ipa; + int i; memset(cld, 0, sizeof(*cld)); ether_addr_copy(cld->outer_mac, filter->dst_mac); @@ -7006,14 +7795,14 @@ if (filter->n_proto == ETH_P_IPV6) { #define IPV6_MAX_INDEX (ARRAY_SIZE(filter->dst_ipv6) - 1) - for (i = 0, j = 0; i < ARRAY_SIZE(filter->dst_ipv6); - i++, j += 2) { + for (i = 0; i < ARRAY_SIZE(filter->dst_ipv6); i++) { ipa = be32_to_cpu(filter->dst_ipv6[IPV6_MAX_INDEX - i]); - ipa = cpu_to_le32(ipa); - memcpy(&cld->ipaddr.raw_v6.data[j], &ipa, sizeof(ipa)); + + *(__le32 *)&cld->ipaddr.raw_v6.data[i * 2] = cpu_to_le32(ipa); } } else { ipa = be32_to_cpu(filter->dst_ipv4); + memcpy(&cld->ipaddr.v4.data, &ipa, sizeof(ipa)); } @@ -7208,19 +7997,21 @@ /** * i40e_parse_cls_flower - Parse tc flower filters provided by kernel * @vsi: Pointer to VSI - * @cls_flower: Pointer to struct tc_cls_flower_offload + * @f: Pointer to struct flow_cls_offload * @filter: Pointer to cloud filter structure * **/ static int i40e_parse_cls_flower(struct i40e_vsi *vsi, - struct tc_cls_flower_offload *f, + struct flow_cls_offload *f, struct i40e_cloud_filter *filter) { + struct flow_rule *rule = flow_cls_offload_flow_rule(f); + struct flow_dissector *dissector = rule->match.dissector; u16 n_proto_mask = 0, n_proto_key = 0, addr_type = 0; struct i40e_pf *pf = vsi->back; u8 field_flags = 0; - if (f->dissector->used_keys & + if (dissector->used_keys & ~(BIT(FLOW_DISSECTOR_KEY_CONTROL) | BIT(FLOW_DISSECTOR_KEY_BASIC) | BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) | @@ -7230,143 +8021,109 @@ BIT(FLOW_DISSECTOR_KEY_PORTS) | BIT(FLOW_DISSECTOR_KEY_ENC_KEYID))) { dev_err(&pf->pdev->dev, "Unsupported key used: 0x%x\n", - f->dissector->used_keys); + dissector->used_keys); return -EOPNOTSUPP; } - if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_KEYID)) { - struct flow_dissector_key_keyid *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_ENC_KEYID, - f->key); + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) { + struct flow_match_enc_keyid match; - struct flow_dissector_key_keyid *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_ENC_KEYID, - f->mask); - - if (mask->keyid != 0) + flow_rule_match_enc_keyid(rule, &match); + if (match.mask->keyid != 0) field_flags |= I40E_CLOUD_FIELD_TEN_ID; - filter->tenant_id = be32_to_cpu(key->keyid); + filter->tenant_id = be32_to_cpu(match.key->keyid); } - if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_BASIC)) { - struct flow_dissector_key_basic *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_BASIC, - f->key); + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) { + struct flow_match_basic match; - struct flow_dissector_key_basic *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_BASIC, - f->mask); - - n_proto_key = ntohs(key->n_proto); - n_proto_mask = ntohs(mask->n_proto); + flow_rule_match_basic(rule, &match); + n_proto_key = ntohs(match.key->n_proto); + n_proto_mask = ntohs(match.mask->n_proto); if (n_proto_key == ETH_P_ALL) { n_proto_key = 0; n_proto_mask = 0; } filter->n_proto = n_proto_key & n_proto_mask; - filter->ip_proto = key->ip_proto; + filter->ip_proto = match.key->ip_proto; } - if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ETH_ADDRS)) { - struct flow_dissector_key_eth_addrs *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_ETH_ADDRS, - f->key); + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) { + struct flow_match_eth_addrs match; - struct flow_dissector_key_eth_addrs *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_ETH_ADDRS, - f->mask); + flow_rule_match_eth_addrs(rule, &match); /* use is_broadcast and is_zero to check for all 0xf or 0 */ - if (!is_zero_ether_addr(mask->dst)) { - if (is_broadcast_ether_addr(mask->dst)) { + if (!is_zero_ether_addr(match.mask->dst)) { + if (is_broadcast_ether_addr(match.mask->dst)) { field_flags |= I40E_CLOUD_FIELD_OMAC; } else { dev_err(&pf->pdev->dev, "Bad ether dest mask %pM\n", - mask->dst); + match.mask->dst); return I40E_ERR_CONFIG; } } - if (!is_zero_ether_addr(mask->src)) { - if (is_broadcast_ether_addr(mask->src)) { + if (!is_zero_ether_addr(match.mask->src)) { + if (is_broadcast_ether_addr(match.mask->src)) { field_flags |= I40E_CLOUD_FIELD_IMAC; } else { dev_err(&pf->pdev->dev, "Bad ether src mask %pM\n", - mask->src); + match.mask->src); return I40E_ERR_CONFIG; } } - ether_addr_copy(filter->dst_mac, key->dst); - ether_addr_copy(filter->src_mac, key->src); + ether_addr_copy(filter->dst_mac, match.key->dst); + ether_addr_copy(filter->src_mac, match.key->src); } - if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_VLAN)) { - struct flow_dissector_key_vlan *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_VLAN, - f->key); - struct flow_dissector_key_vlan *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_VLAN, - f->mask); + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) { + struct flow_match_vlan match; - if (mask->vlan_id) { - if (mask->vlan_id == VLAN_VID_MASK) { + flow_rule_match_vlan(rule, &match); + if (match.mask->vlan_id) { + if (match.mask->vlan_id == VLAN_VID_MASK) { field_flags |= I40E_CLOUD_FIELD_IVLAN; } else { dev_err(&pf->pdev->dev, "Bad vlan mask 0x%04x\n", - mask->vlan_id); + match.mask->vlan_id); return I40E_ERR_CONFIG; } } - filter->vlan_id = cpu_to_be16(key->vlan_id); + filter->vlan_id = cpu_to_be16(match.key->vlan_id); } - if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_CONTROL)) { - struct flow_dissector_key_control *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_CONTROL, - f->key); + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) { + struct flow_match_control match; - addr_type = key->addr_type; + flow_rule_match_control(rule, &match); + addr_type = match.key->addr_type; } if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { - struct flow_dissector_key_ipv4_addrs *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_IPV4_ADDRS, - f->key); - struct flow_dissector_key_ipv4_addrs *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_IPV4_ADDRS, - f->mask); + struct flow_match_ipv4_addrs match; - if (mask->dst) { - if (mask->dst == cpu_to_be32(0xffffffff)) { + flow_rule_match_ipv4_addrs(rule, &match); + if (match.mask->dst) { + if (match.mask->dst == cpu_to_be32(0xffffffff)) { field_flags |= I40E_CLOUD_FIELD_IIP; } else { dev_err(&pf->pdev->dev, "Bad ip dst mask %pI4b\n", - &mask->dst); + &match.mask->dst); return I40E_ERR_CONFIG; } } - if (mask->src) { - if (mask->src == cpu_to_be32(0xffffffff)) { + if (match.mask->src) { + if (match.mask->src == cpu_to_be32(0xffffffff)) { field_flags |= I40E_CLOUD_FIELD_IIP; } else { dev_err(&pf->pdev->dev, "Bad ip src mask %pI4b\n", - &mask->src); + &match.mask->src); return I40E_ERR_CONFIG; } } @@ -7375,70 +8132,60 @@ dev_err(&pf->pdev->dev, "Tenant id not allowed for ip filter\n"); return I40E_ERR_CONFIG; } - filter->dst_ipv4 = key->dst; - filter->src_ipv4 = key->src; + filter->dst_ipv4 = match.key->dst; + filter->src_ipv4 = match.key->src; } if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { - struct flow_dissector_key_ipv6_addrs *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_IPV6_ADDRS, - f->key); - struct flow_dissector_key_ipv6_addrs *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_IPV6_ADDRS, - f->mask); + struct flow_match_ipv6_addrs match; + + flow_rule_match_ipv6_addrs(rule, &match); /* src and dest IPV6 address should not be LOOPBACK * (0:0:0:0:0:0:0:1), which can be represented as ::1 */ - if (ipv6_addr_loopback(&key->dst) || - ipv6_addr_loopback(&key->src)) { + if (ipv6_addr_loopback(&match.key->dst) || + ipv6_addr_loopback(&match.key->src)) { dev_err(&pf->pdev->dev, "Bad ipv6, addr is LOOPBACK\n"); return I40E_ERR_CONFIG; } - if (!ipv6_addr_any(&mask->dst) || !ipv6_addr_any(&mask->src)) + if (!ipv6_addr_any(&match.mask->dst) || + !ipv6_addr_any(&match.mask->src)) field_flags |= I40E_CLOUD_FIELD_IIP; - memcpy(&filter->src_ipv6, &key->src.s6_addr32, + memcpy(&filter->src_ipv6, &match.key->src.s6_addr32, sizeof(filter->src_ipv6)); - memcpy(&filter->dst_ipv6, &key->dst.s6_addr32, + memcpy(&filter->dst_ipv6, &match.key->dst.s6_addr32, sizeof(filter->dst_ipv6)); } - if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_PORTS)) { - struct flow_dissector_key_ports *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_PORTS, - f->key); - struct flow_dissector_key_ports *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_PORTS, - f->mask); + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) { + struct flow_match_ports match; - if (mask->src) { - if (mask->src == cpu_to_be16(0xffff)) { + flow_rule_match_ports(rule, &match); + if (match.mask->src) { + if (match.mask->src == cpu_to_be16(0xffff)) { field_flags |= I40E_CLOUD_FIELD_IIP; } else { dev_err(&pf->pdev->dev, "Bad src port mask 0x%04x\n", - be16_to_cpu(mask->src)); + be16_to_cpu(match.mask->src)); return I40E_ERR_CONFIG; } } - if (mask->dst) { - if (mask->dst == cpu_to_be16(0xffff)) { + if (match.mask->dst) { + if (match.mask->dst == cpu_to_be16(0xffff)) { field_flags |= I40E_CLOUD_FIELD_IIP; } else { dev_err(&pf->pdev->dev, "Bad dst port mask 0x%04x\n", - be16_to_cpu(mask->dst)); + be16_to_cpu(match.mask->dst)); return I40E_ERR_CONFIG; } } - filter->dst_port = key->dst; - filter->src_port = key->src; + filter->dst_port = match.key->dst; + filter->src_port = match.key->src; switch (filter->ip_proto) { case IPPROTO_TCP: @@ -7492,11 +8239,11 @@ /** * i40e_configure_clsflower - Configure tc flower filters * @vsi: Pointer to VSI - * @cls_flower: Pointer to struct tc_cls_flower_offload + * @cls_flower: Pointer to struct flow_cls_offload * **/ static int i40e_configure_clsflower(struct i40e_vsi *vsi, - struct tc_cls_flower_offload *cls_flower) + struct flow_cls_offload *cls_flower) { int tc = tc_classid_to_hwtc(vsi->netdev, cls_flower->classid); struct i40e_cloud_filter *filter = NULL; @@ -7506,6 +8253,11 @@ if (tc < 0) { dev_err(&vsi->back->pdev->dev, "Invalid traffic class\n"); return -EOPNOTSUPP; + } + + if (!tc) { + dev_err(&pf->pdev->dev, "Unable to add filter because of invalid destination"); + return -EINVAL; } if (test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state) || @@ -7587,11 +8339,11 @@ /** * i40e_delete_clsflower - Remove tc flower filters * @vsi: Pointer to VSI - * @cls_flower: Pointer to struct tc_cls_flower_offload + * @cls_flower: Pointer to struct flow_cls_offload * **/ static int i40e_delete_clsflower(struct i40e_vsi *vsi, - struct tc_cls_flower_offload *cls_flower) + struct flow_cls_offload *cls_flower) { struct i40e_cloud_filter *filter = NULL; struct i40e_pf *pf = vsi->back; @@ -7630,20 +8382,20 @@ /** * i40e_setup_tc_cls_flower - flower classifier offloads - * @netdev: net device to configure - * @type_data: offload data + * @np: net device to configure + * @cls_flower: offload data **/ static int i40e_setup_tc_cls_flower(struct i40e_netdev_priv *np, - struct tc_cls_flower_offload *cls_flower) + struct flow_cls_offload *cls_flower) { struct i40e_vsi *vsi = np->vsi; switch (cls_flower->command) { - case TC_CLSFLOWER_REPLACE: + case FLOW_CLS_REPLACE: return i40e_configure_clsflower(vsi, cls_flower); - case TC_CLSFLOWER_DESTROY: + case FLOW_CLS_DESTROY: return i40e_delete_clsflower(vsi, cls_flower); - case TC_CLSFLOWER_STATS: + case FLOW_CLS_STATS: return -EOPNOTSUPP; default: return -EOPNOTSUPP; @@ -7667,34 +8419,21 @@ } } -static int i40e_setup_tc_block(struct net_device *dev, - struct tc_block_offload *f) -{ - struct i40e_netdev_priv *np = netdev_priv(dev); - - if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS) - return -EOPNOTSUPP; - - switch (f->command) { - case TC_BLOCK_BIND: - return tcf_block_cb_register(f->block, i40e_setup_tc_block_cb, - np, np, f->extack); - case TC_BLOCK_UNBIND: - tcf_block_cb_unregister(f->block, i40e_setup_tc_block_cb, np); - return 0; - default: - return -EOPNOTSUPP; - } -} +static LIST_HEAD(i40e_block_cb_list); static int __i40e_setup_tc(struct net_device *netdev, enum tc_setup_type type, void *type_data) { + struct i40e_netdev_priv *np = netdev_priv(netdev); + switch (type) { case TC_SETUP_QDISC_MQPRIO: return i40e_setup_tc(netdev, type_data); case TC_SETUP_BLOCK: - return i40e_setup_tc_block(netdev, type_data); + return flow_block_cb_setup_simple(type_data, + &i40e_block_cb_list, + i40e_setup_tc_block_cb, + np, np, true); default: return -EOPNOTSUPP; } @@ -7962,9 +8701,6 @@ { u32 val; - WARN_ON(in_interrupt()); - - /* do the biggest reset indicated */ if (reset_flags & BIT_ULL(__I40E_GLOBAL_RESET_REQUESTED)) { @@ -8013,6 +8749,10 @@ */ i40e_prep_for_reset(pf, lock_acquired); i40e_reset_and_rebuild(pf, true, lock_acquired); + dev_info(&pf->pdev->dev, + pf->flags & I40E_FLAG_DISABLE_FW_LLDP ? + "FW LLDP is disabled\n" : + "FW LLDP is enabled\n"); } else if (reset_flags & BIT_ULL(__I40E_REINIT_REQUESTED)) { int v; @@ -8202,8 +8942,8 @@ i40e_service_event_schedule(pf); } else { i40e_pf_unquiesce_all_vsi(pf); - set_bit(__I40E_CLIENT_SERVICE_REQUESTED, pf->state); - set_bit(__I40E_CLIENT_L2_CHANGE, pf->state); + set_bit(__I40E_CLIENT_SERVICE_REQUESTED, pf->state); + set_bit(__I40E_CLIENT_L2_CHANGE, pf->state); } exit: @@ -8489,13 +9229,6 @@ return i40e_get_current_fd_count(pf) - pf->fdir_pf_active_filters; } -/* We can see up to 256 filter programming desc in transit if the filters are - * being applied really fast; before we see the first - * filter miss error on Rx queue 0. Accumulating enough error messages before - * reacting will make sure we don't cause flush too often. - */ -#define I40E_MAX_FD_PROGRAM_ERROR 256 - /** * i40e_fdir_reinit_subtask - Worker thread to reinit FDIR filter table * @pf: board private structure @@ -8585,14 +9318,9 @@ i40e_status status; bool new_link, old_link; - /* save off old link status information */ - pf->hw.phy.link_info_old = pf->hw.phy.link_info; - /* set this to force the get_link_status call to refresh state */ pf->hw.phy.get_link_info = true; - old_link = (pf->hw.phy.link_info_old.link_info & I40E_AQ_LINK_UP); - status = i40e_get_link_status(&pf->hw, &new_link); /* On success, disable temp link polling */ @@ -8622,7 +9350,7 @@ /* Notify the base of the switch tree connected to * the link. Floating VEBs are not notified. */ - if (pf->lan_veb != I40E_NO_VEB && pf->veb[pf->lan_veb]) + if (pf->lan_veb < I40E_MAX_VEB && pf->veb[pf->lan_veb]) i40e_veb_link_event(pf->veb[pf->lan_veb], new_link); else i40e_vsi_link_event(vsi, new_link); @@ -9100,6 +9828,7 @@ /** * i40e_get_capabilities - get info about the HW * @pf: the PF struct + * @list_type: AQ capability to be queried **/ static int i40e_get_capabilities(struct i40e_pf *pf, enum i40e_admin_queue_opc list_type) @@ -9337,6 +10066,21 @@ } /** + * i40e_clean_xps_state - clean xps state for every tx_ring + * @vsi: ptr to the VSI + **/ +static void i40e_clean_xps_state(struct i40e_vsi *vsi) +{ + int i; + + if (vsi->tx_rings) + for (i = 0; i < vsi->num_queue_pairs; i++) + if (vsi->tx_rings[i]) + clear_bit(__I40E_TX_XPS_INIT_DONE, + vsi->tx_rings[i]->state); +} + +/** * i40e_prep_for_reset - prep for the core to reset * @pf: board private structure * @lock_acquired: indicates whether or not the lock has been acquired @@ -9367,8 +10111,10 @@ rtnl_unlock(); for (v = 0; v < pf->num_alloc_vsi; v++) { - if (pf->vsi[v]) + if (pf->vsi[v]) { + i40e_clean_xps_state(pf->vsi[v]); pf->vsi[v]->seid = 0; + } } i40e_shutdown_adminq(&pf->hw); @@ -9380,6 +10126,11 @@ dev_warn(&pf->pdev->dev, "shutdown_lan_hmc failed: %d\n", ret); } + + /* Save the current PTP time so that we can restore the time after the + * reset completes. + */ + i40e_ptp_save_hw_time(pf); } /** @@ -9390,11 +10141,11 @@ { struct i40e_driver_version dv; - dv.major_version = DRV_VERSION_MAJOR; - dv.minor_version = DRV_VERSION_MINOR; - dv.build_version = DRV_VERSION_BUILD; + dv.major_version = 0xff; + dv.minor_version = 0xff; + dv.build_version = 0xff; dv.subbuild_version = 0; - strlcpy(dv.driver_string, DRV_VERSION, sizeof(dv.driver_string)); + strlcpy(dv.driver_string, UTS_RELEASE, sizeof(dv.driver_string)); i40e_aq_send_driver_version(&pf->hw, &dv, NULL); } @@ -9472,13 +10223,19 @@ **/ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired) { + const bool is_recovery_mode_reported = i40e_check_recovery_mode(pf); struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi]; struct i40e_hw *hw = &pf->hw; i40e_status ret; u32 val; int v; - if (test_bit(__I40E_DOWN, pf->state)) + if (test_bit(__I40E_EMP_RESET_INTR_RECEIVED, pf->state) && + is_recovery_mode_reported) + i40e_set_ethtool_ops(pf->vsi[pf->lan_vsi]->netdev); + + if (test_bit(__I40E_DOWN, pf->state) && + !test_bit(__I40E_RECOVERY_MODE, pf->state)) goto clear_recovery; dev_dbg(&pf->pdev->dev, "Rebuilding internal switch\n"); @@ -9500,6 +10257,43 @@ /* re-verify the eeprom if we just had an EMP reset */ if (test_and_clear_bit(__I40E_EMP_RESET_INTR_RECEIVED, pf->state)) i40e_verify_eeprom(pf); + + /* if we are going out of or into recovery mode we have to act + * accordingly with regard to resources initialization + * and deinitialization + */ + if (test_bit(__I40E_RECOVERY_MODE, pf->state)) { + if (i40e_get_capabilities(pf, + i40e_aqc_opc_list_func_capabilities)) + goto end_unlock; + + if (is_recovery_mode_reported) { + /* we're staying in recovery mode so we'll reinitialize + * misc vector here + */ + if (i40e_setup_misc_vector_for_recovery_mode(pf)) + goto end_unlock; + } else { + if (!lock_acquired) + rtnl_lock(); + /* we're going out of recovery mode so we'll free + * the IRQ allocated specifically for recovery mode + * and restore the interrupt scheme + */ + free_irq(pf->pdev->irq, pf); + i40e_clear_interrupt_scheme(pf); + if (i40e_restore_interrupt_scheme(pf)) + goto end_unlock; + } + + /* tell the firmware that we're starting */ + i40e_send_version(pf); + + /* bail out in case recovery mode was detected, as there is + * no need for further configuration. + */ + goto end_unlock; + } i40e_clear_pxe_mode(hw); ret = i40e_get_capabilities(pf, i40e_aqc_opc_list_func_capabilities); @@ -9532,7 +10326,7 @@ /* do basic switch setup */ if (!lock_acquired) rtnl_lock(); - ret = i40e_setup_pf_switch(pf, reinit); + ret = i40e_setup_pf_switch(pf, reinit, true); if (ret) goto end_unlock; @@ -9602,10 +10396,10 @@ } if (vsi->mqprio_qopt.max_rate[0]) { - u64 max_tx_rate = vsi->mqprio_qopt.max_rate[0]; + u64 max_tx_rate = i40e_bw_bytes_to_mbits(vsi, + vsi->mqprio_qopt.max_rate[0]); u64 credits = 0; - do_div(max_tx_rate, I40E_BW_MBPS_DIVISOR); ret = i40e_set_bw_limit(vsi, vsi->seid, max_tx_rate); if (ret) goto end_unlock; @@ -9745,7 +10539,6 @@ { struct i40e_hw *hw = &pf->hw; bool mdd_detected = false; - bool pf_mdd_detected = false; struct i40e_vf *vf; u32 reg; int i; @@ -9791,19 +10584,12 @@ reg = rd32(hw, I40E_PF_MDET_TX); if (reg & I40E_PF_MDET_TX_VALID_MASK) { wr32(hw, I40E_PF_MDET_TX, 0xFFFF); - dev_info(&pf->pdev->dev, "TX driver issue detected, PF reset issued\n"); - pf_mdd_detected = true; + dev_dbg(&pf->pdev->dev, "TX driver issue detected on PF\n"); } reg = rd32(hw, I40E_PF_MDET_RX); if (reg & I40E_PF_MDET_RX_VALID_MASK) { wr32(hw, I40E_PF_MDET_RX, 0xFFFF); - dev_info(&pf->pdev->dev, "RX driver issue detected, PF reset issued\n"); - pf_mdd_detected = true; - } - /* Queue belongs to the PF, initiate a reset */ - if (pf_mdd_detected) { - set_bit(__I40E_PF_RESET_REQUESTED, pf->state); - i40e_service_event_schedule(pf); + dev_dbg(&pf->pdev->dev, "RX driver issue detected on PF\n"); } } @@ -9816,6 +10602,9 @@ vf->num_mdd_events++; dev_info(&pf->pdev->dev, "TX driver issue detected on VF %d\n", i); + dev_info(&pf->pdev->dev, + "Use PF Control I/F to re-enable the VF\n"); + set_bit(I40E_VF_STATE_DISABLED, &vf->vf_states); } reg = rd32(hw, I40E_VP_MDET_RX(i)); @@ -9824,11 +10613,6 @@ vf->num_mdd_events++; dev_info(&pf->pdev->dev, "RX driver issue detected on VF %d\n", i); - } - - if (vf->num_mdd_events > I40E_DEFAULT_NUM_MDD_EVENTS_ALLOWED) { - dev_info(&pf->pdev->dev, - "Too many MDD events on VF %d, disabled\n", i); dev_info(&pf->pdev->dev, "Use PF Control I/F to re-enable the VF\n"); set_bit(I40E_VF_STATE_DISABLED, &vf->vf_states); @@ -9843,106 +10627,6 @@ i40e_flush(hw); } -static const char *i40e_tunnel_name(u8 type) -{ - switch (type) { - case UDP_TUNNEL_TYPE_VXLAN: - return "vxlan"; - case UDP_TUNNEL_TYPE_GENEVE: - return "geneve"; - default: - return "unknown"; - } -} - -/** - * i40e_sync_udp_filters - Trigger a sync event for existing UDP filters - * @pf: board private structure - **/ -static void i40e_sync_udp_filters(struct i40e_pf *pf) -{ - int i; - - /* loop through and set pending bit for all active UDP filters */ - for (i = 0; i < I40E_MAX_PF_UDP_OFFLOAD_PORTS; i++) { - if (pf->udp_ports[i].port) - pf->pending_udp_bitmap |= BIT_ULL(i); - } - - set_bit(__I40E_UDP_FILTER_SYNC_PENDING, pf->state); -} - -/** - * i40e_sync_udp_filters_subtask - Sync the VSI filter list with HW - * @pf: board private structure - **/ -static void i40e_sync_udp_filters_subtask(struct i40e_pf *pf) -{ - struct i40e_hw *hw = &pf->hw; - u8 filter_index, type; - u16 port; - int i; - - if (!test_and_clear_bit(__I40E_UDP_FILTER_SYNC_PENDING, pf->state)) - return; - - /* acquire RTNL to maintain state of flags and port requests */ - rtnl_lock(); - - for (i = 0; i < I40E_MAX_PF_UDP_OFFLOAD_PORTS; i++) { - if (pf->pending_udp_bitmap & BIT_ULL(i)) { - struct i40e_udp_port_config *udp_port; - i40e_status ret = 0; - - udp_port = &pf->udp_ports[i]; - pf->pending_udp_bitmap &= ~BIT_ULL(i); - - port = READ_ONCE(udp_port->port); - type = READ_ONCE(udp_port->type); - filter_index = READ_ONCE(udp_port->filter_index); - - /* release RTNL while we wait on AQ command */ - rtnl_unlock(); - - if (port) - ret = i40e_aq_add_udp_tunnel(hw, port, - type, - &filter_index, - NULL); - else if (filter_index != I40E_UDP_PORT_INDEX_UNUSED) - ret = i40e_aq_del_udp_tunnel(hw, filter_index, - NULL); - - /* reacquire RTNL so we can update filter_index */ - rtnl_lock(); - - if (ret) { - dev_info(&pf->pdev->dev, - "%s %s port %d, index %d failed, err %s aq_err %s\n", - i40e_tunnel_name(type), - port ? "add" : "delete", - port, - filter_index, - i40e_stat_str(&pf->hw, ret), - i40e_aq_str(&pf->hw, - pf->hw.aq.asq_last_status)); - if (port) { - /* failed to add, just reset port, - * drop pending bit for any deletion - */ - udp_port->port = 0; - pf->pending_udp_bitmap &= ~BIT_ULL(i); - } - } else if (port) { - /* record filter index on success */ - udp_port->filter_index = filter_index; - } - } - } - - rtnl_unlock(); -} - /** * i40e_service_task - Run the driver's async subtasks * @work: pointer to work_struct containing our data @@ -9955,31 +10639,37 @@ unsigned long start_time = jiffies; /* don't bother with service tasks if a reset is in progress */ - if (test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state)) + if (test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state) || + test_bit(__I40E_SUSPENDED, pf->state)) return; if (test_and_set_bit(__I40E_SERVICE_SCHED, pf->state)) return; - i40e_detect_recover_hung(pf->vsi[pf->lan_vsi]); - i40e_sync_filters_subtask(pf); - i40e_reset_subtask(pf); - i40e_handle_mdd_event(pf); - i40e_vc_process_vflr_event(pf); - i40e_watchdog_subtask(pf); - i40e_fdir_reinit_subtask(pf); - if (test_and_clear_bit(__I40E_CLIENT_RESET, pf->state)) { - /* Client subtask will reopen next time through. */ - i40e_notify_client_of_netdev_close(pf->vsi[pf->lan_vsi], true); + if (!test_bit(__I40E_RECOVERY_MODE, pf->state)) { + i40e_detect_recover_hung(pf->vsi[pf->lan_vsi]); + i40e_sync_filters_subtask(pf); + i40e_reset_subtask(pf); + i40e_handle_mdd_event(pf); + i40e_vc_process_vflr_event(pf); + i40e_watchdog_subtask(pf); + i40e_fdir_reinit_subtask(pf); + if (test_and_clear_bit(__I40E_CLIENT_RESET, pf->state)) { + /* Client subtask will reopen next time through. */ + i40e_notify_client_of_netdev_close(pf->vsi[pf->lan_vsi], + true); + } else { + i40e_client_subtask(pf); + if (test_and_clear_bit(__I40E_CLIENT_L2_CHANGE, + pf->state)) + i40e_notify_client_of_l2_param_changes( + pf->vsi[pf->lan_vsi]); + } + i40e_sync_filters_subtask(pf); } else { - i40e_client_subtask(pf); - if (test_and_clear_bit(__I40E_CLIENT_L2_CHANGE, - pf->state)) - i40e_notify_client_of_l2_param_changes( - pf->vsi[pf->lan_vsi]); + i40e_reset_subtask(pf); } - i40e_sync_filters_subtask(pf); - i40e_sync_udp_filters_subtask(pf); + i40e_clean_adminq_subtask(pf); /* flush memory to make sure state is correct before next watchdog */ @@ -9999,7 +10689,7 @@ /** * i40e_service_timer - timer callback - * @data: pointer to PF struct + * @t: timer list pointer **/ static void i40e_service_timer(struct timer_list *t) { @@ -10021,8 +10711,12 @@ switch (vsi->type) { case I40E_VSI_MAIN: vsi->alloc_queue_pairs = pf->num_lan_qps; - vsi->num_desc = ALIGN(I40E_DEFAULT_NUM_DESCRIPTORS, - I40E_REQ_DESCRIPTOR_MULTIPLE); + if (!vsi->num_tx_desc) + vsi->num_tx_desc = ALIGN(I40E_DEFAULT_NUM_DESCRIPTORS, + I40E_REQ_DESCRIPTOR_MULTIPLE); + if (!vsi->num_rx_desc) + vsi->num_rx_desc = ALIGN(I40E_DEFAULT_NUM_DESCRIPTORS, + I40E_REQ_DESCRIPTOR_MULTIPLE); if (pf->flags & I40E_FLAG_MSIX_ENABLED) vsi->num_q_vectors = pf->num_lan_msix; else @@ -10032,22 +10726,32 @@ case I40E_VSI_FDIR: vsi->alloc_queue_pairs = 1; - vsi->num_desc = ALIGN(I40E_FDIR_RING_COUNT, - I40E_REQ_DESCRIPTOR_MULTIPLE); + vsi->num_tx_desc = ALIGN(I40E_FDIR_RING_COUNT, + I40E_REQ_DESCRIPTOR_MULTIPLE); + vsi->num_rx_desc = ALIGN(I40E_FDIR_RING_COUNT, + I40E_REQ_DESCRIPTOR_MULTIPLE); vsi->num_q_vectors = pf->num_fdsb_msix; break; case I40E_VSI_VMDQ2: vsi->alloc_queue_pairs = pf->num_vmdq_qps; - vsi->num_desc = ALIGN(I40E_DEFAULT_NUM_DESCRIPTORS, - I40E_REQ_DESCRIPTOR_MULTIPLE); + if (!vsi->num_tx_desc) + vsi->num_tx_desc = ALIGN(I40E_DEFAULT_NUM_DESCRIPTORS, + I40E_REQ_DESCRIPTOR_MULTIPLE); + if (!vsi->num_rx_desc) + vsi->num_rx_desc = ALIGN(I40E_DEFAULT_NUM_DESCRIPTORS, + I40E_REQ_DESCRIPTOR_MULTIPLE); vsi->num_q_vectors = pf->num_vmdq_msix; break; case I40E_VSI_SRIOV: vsi->alloc_queue_pairs = pf->num_vf_qps; - vsi->num_desc = ALIGN(I40E_DEFAULT_NUM_DESCRIPTORS, - I40E_REQ_DESCRIPTOR_MULTIPLE); + if (!vsi->num_tx_desc) + vsi->num_tx_desc = ALIGN(I40E_DEFAULT_NUM_DESCRIPTORS, + I40E_REQ_DESCRIPTOR_MULTIPLE); + if (!vsi->num_rx_desc) + vsi->num_rx_desc = ALIGN(I40E_DEFAULT_NUM_DESCRIPTORS, + I40E_REQ_DESCRIPTOR_MULTIPLE); break; default: @@ -10160,6 +10864,12 @@ hash_init(vsi->mac_filter_hash); vsi->irqs_ready = false; + if (type == I40E_VSI_MAIN) { + vsi->af_xdp_zc_qps = bitmap_zalloc(pf->num_lan_qps, GFP_KERNEL); + if (!vsi->af_xdp_zc_qps) + goto err_rings; + } + ret = i40e_set_num_rings_in_vsi(vsi); if (ret) goto err_rings; @@ -10178,6 +10888,7 @@ goto unlock_pf; err_rings: + bitmap_free(vsi->af_xdp_zc_qps); pf->next_vsi = i - 1; kfree(vsi); unlock_pf: @@ -10258,6 +10969,7 @@ i40e_put_lump(pf->qp_pile, vsi->base_queue, vsi->idx); i40e_put_lump(pf->irq_pile, vsi->base_vector, vsi->idx); + bitmap_free(vsi->af_xdp_zc_qps); i40e_vsi_free_arrays(vsi, true); i40e_clear_rss_config_user(vsi); @@ -10315,7 +11027,7 @@ ring->vsi = vsi; ring->netdev = vsi->netdev; ring->dev = &pf->pdev->dev; - ring->count = vsi->num_desc; + ring->count = vsi->num_tx_desc; ring->size = 0; ring->dcb_tc = 0; if (vsi->back->hw_features & I40E_HW_WB_ON_ITR_CAPABLE) @@ -10332,7 +11044,7 @@ ring->vsi = vsi; ring->netdev = NULL; ring->dev = &pf->pdev->dev; - ring->count = vsi->num_desc; + ring->count = vsi->num_tx_desc; ring->size = 0; ring->dcb_tc = 0; if (vsi->back->hw_features & I40E_HW_WB_ON_ITR_CAPABLE) @@ -10348,7 +11060,7 @@ ring->vsi = vsi; ring->netdev = vsi->netdev; ring->dev = &pf->pdev->dev; - ring->count = vsi->num_desc; + ring->count = vsi->num_rx_desc; ring->size = 0; ring->dcb_tc = 0; ring->itr_setting = pf->rx_itr_default; @@ -10616,11 +11328,10 @@ * i40e_vsi_alloc_q_vector - Allocate memory for a single interrupt vector * @vsi: the VSI being configured * @v_idx: index of the vector in the vsi struct - * @cpu: cpu to be used on affinity_mask * * We allocate one q_vector. If allocation fails we return -ENOMEM. **/ -static int i40e_vsi_alloc_q_vector(struct i40e_vsi *vsi, int v_idx, int cpu) +static int i40e_vsi_alloc_q_vector(struct i40e_vsi *vsi, int v_idx) { struct i40e_q_vector *q_vector; @@ -10653,7 +11364,7 @@ static int i40e_vsi_alloc_q_vectors(struct i40e_vsi *vsi) { struct i40e_pf *pf = vsi->back; - int err, v_idx, num_q_vectors, current_cpu; + int err, v_idx, num_q_vectors; /* if not MSIX, give the one vector only to the LAN VSI */ if (pf->flags & I40E_FLAG_MSIX_ENABLED) @@ -10663,15 +11374,10 @@ else return -EINVAL; - current_cpu = cpumask_first(cpu_online_mask); - for (v_idx = 0; v_idx < num_q_vectors; v_idx++) { - err = i40e_vsi_alloc_q_vector(vsi, v_idx, current_cpu); + err = i40e_vsi_alloc_q_vector(vsi, v_idx); if (err) goto err_out; - current_cpu = cpumask_next(current_cpu, cpu_online_mask); - if (unlikely(current_cpu >= nr_cpu_ids)) - current_cpu = cpumask_first(cpu_online_mask); } return 0; @@ -10793,6 +11499,48 @@ } /** + * i40e_setup_misc_vector_for_recovery_mode - Setup the misc vector to handle + * non queue events in recovery mode + * @pf: board private structure + * + * This sets up the handler for MSIX 0 or MSI/legacy, which is used to manage + * the non-queue interrupts, e.g. AdminQ and errors in recovery mode. + * This is handled differently than in recovery mode since no Tx/Rx resources + * are being allocated. + **/ +static int i40e_setup_misc_vector_for_recovery_mode(struct i40e_pf *pf) +{ + int err; + + if (pf->flags & I40E_FLAG_MSIX_ENABLED) { + err = i40e_setup_misc_vector(pf); + + if (err) { + dev_info(&pf->pdev->dev, + "MSI-X misc vector request failed, error %d\n", + err); + return err; + } + } else { + u32 flags = pf->flags & I40E_FLAG_MSI_ENABLED ? 0 : IRQF_SHARED; + + err = request_irq(pf->pdev->irq, i40e_intr, flags, + pf->int_name, pf); + + if (err) { + dev_info(&pf->pdev->dev, + "MSI/legacy misc vector request failed, error %d\n", + err); + return err; + } + i40e_enable_misc_int_causes(pf); + i40e_irq_dynamic_enable_icr0(pf); + } + + return 0; +} + +/** * i40e_setup_misc_vector - Setup the misc vector to handle non queue events * @pf: board private structure * @@ -10861,7 +11609,7 @@ } if (lut) { - bool pf_lut = vsi->type == I40E_VSI_MAIN ? true : false; + bool pf_lut = vsi->type == I40E_VSI_MAIN; ret = i40e_aq_get_rss_lut(hw, vsi->id, pf_lut, lut, lut_size); if (ret) { @@ -11103,6 +11851,7 @@ if (!(pf->flags & I40E_FLAG_RSS_ENABLED)) return 0; + queue_count = min_t(int, queue_count, num_online_cpus()); new_rss_size = min_t(int, queue_count, pf->rss_size_max); if (queue_count != vsi->num_queue_pairs) { @@ -11264,6 +12013,58 @@ } /** + * i40e_is_total_port_shutdown_enabled - read NVM and return value + * if total port shutdown feature is enabled for this PF + * @pf: board private structure + **/ +static bool i40e_is_total_port_shutdown_enabled(struct i40e_pf *pf) +{ +#define I40E_TOTAL_PORT_SHUTDOWN_ENABLED BIT(4) +#define I40E_FEATURES_ENABLE_PTR 0x2A +#define I40E_CURRENT_SETTING_PTR 0x2B +#define I40E_LINK_BEHAVIOR_WORD_OFFSET 0x2D +#define I40E_LINK_BEHAVIOR_WORD_LENGTH 0x1 +#define I40E_LINK_BEHAVIOR_OS_FORCED_ENABLED BIT(0) +#define I40E_LINK_BEHAVIOR_PORT_BIT_LENGTH 4 + i40e_status read_status = I40E_SUCCESS; + u16 sr_emp_sr_settings_ptr = 0; + u16 features_enable = 0; + u16 link_behavior = 0; + bool ret = false; + + read_status = i40e_read_nvm_word(&pf->hw, + I40E_SR_EMP_SR_SETTINGS_PTR, + &sr_emp_sr_settings_ptr); + if (read_status) + goto err_nvm; + read_status = i40e_read_nvm_word(&pf->hw, + sr_emp_sr_settings_ptr + + I40E_FEATURES_ENABLE_PTR, + &features_enable); + if (read_status) + goto err_nvm; + if (I40E_TOTAL_PORT_SHUTDOWN_ENABLED & features_enable) { + read_status = i40e_read_nvm_module_data(&pf->hw, + I40E_SR_EMP_SR_SETTINGS_PTR, + I40E_CURRENT_SETTING_PTR, + I40E_LINK_BEHAVIOR_WORD_OFFSET, + I40E_LINK_BEHAVIOR_WORD_LENGTH, + &link_behavior); + if (read_status) + goto err_nvm; + link_behavior >>= (pf->hw.port * I40E_LINK_BEHAVIOR_PORT_BIT_LENGTH); + ret = I40E_LINK_BEHAVIOR_OS_FORCED_ENABLED & link_behavior; + } + return ret; + +err_nvm: + dev_warn(&pf->pdev->dev, + "total-port-shutdown feature is off due to read nvm error: %s\n", + i40e_stat_str(&pf->hw, read_status)); + return ret; +} + +/** * i40e_sw_init - Initialize general software structures (struct i40e_pf) * @pf: board private structure to initialize * @@ -11405,16 +12206,15 @@ /* IWARP needs one extra vector for CQP just like MISC.*/ pf->num_iwarp_msix = (int)num_online_cpus() + 1; } - /* Stopping the FW LLDP engine is only supported on the - * XL710 with a FW ver >= 1.7. Also, stopping FW LLDP - * engine is not supported if NPAR is functioning on this - * part + /* Stopping FW LLDP engine is supported on XL710 and X722 + * starting from FW versions determined in i40e_init_adminq. + * Stopping the FW LLDP engine is not supported on XL710 + * if NPAR is functioning so unset this hw flag in this case. */ if (pf->hw.mac.type == I40E_MAC_XL710 && - !pf->hw.func_caps.npar_enable && - (pf->hw.aq.api_maj_ver > 1 || - (pf->hw.aq.api_maj_ver == 1 && pf->hw.aq.api_min_ver > 6))) - pf->hw_features |= I40E_HW_STOPPABLE_FW_LLDP; + pf->hw.func_caps.npar_enable && + (pf->hw.flags & I40E_HW_FLAG_FW_LLDP_STOPPABLE)) + pf->hw.flags &= ~I40E_HW_FLAG_FW_LLDP_STOPPABLE; #ifdef CONFIG_PCI_IOV if (pf->hw.func_caps.num_vfs && pf->hw.partition_id == 1) { @@ -11444,6 +12244,16 @@ pf->tx_timeout_recovery_level = 1; + if (pf->hw.mac.type != I40E_MAC_X722 && + i40e_is_total_port_shutdown_enabled(pf)) { + /* Link down on close must be on when total port shutdown + * is enabled for a given port + */ + pf->flags |= (I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENABLED | + I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED); + dev_info(&pf->pdev->dev, + "total-port-shutdown was enabled, link-down-on-close is forced on\n"); + } mutex_init(&pf->switch_mutex); sw_init_done: @@ -11550,6 +12360,9 @@ return -EINVAL; } + if (!(features & NETIF_F_HW_L2FW_DOFFLOAD) && vsi->macvlan_cnt) + i40e_del_all_macvlans(vsi); + need_reset = i40e_set_ntuple(pf, features); if (need_reset) @@ -11558,131 +12371,48 @@ return 0; } -/** - * i40e_get_udp_port_idx - Lookup a possibly offloaded for Rx UDP port - * @pf: board private structure - * @port: The UDP port to look up - * - * Returns the index number or I40E_MAX_PF_UDP_OFFLOAD_PORTS if port not found - **/ -static u8 i40e_get_udp_port_idx(struct i40e_pf *pf, u16 port) -{ - u8 i; - - for (i = 0; i < I40E_MAX_PF_UDP_OFFLOAD_PORTS; i++) { - /* Do not report ports with pending deletions as - * being available. - */ - if (!port && (pf->pending_udp_bitmap & BIT_ULL(i))) - continue; - if (pf->udp_ports[i].port == port) - return i; - } - - return i; -} - -/** - * i40e_udp_tunnel_add - Get notifications about UDP tunnel ports that come up - * @netdev: This physical port's netdev - * @ti: Tunnel endpoint information - **/ -static void i40e_udp_tunnel_add(struct net_device *netdev, - struct udp_tunnel_info *ti) +static int i40e_udp_tunnel_set_port(struct net_device *netdev, + unsigned int table, unsigned int idx, + struct udp_tunnel_info *ti) { struct i40e_netdev_priv *np = netdev_priv(netdev); - struct i40e_vsi *vsi = np->vsi; - struct i40e_pf *pf = vsi->back; - u16 port = ntohs(ti->port); - u8 next_idx; - u8 idx; + struct i40e_hw *hw = &np->vsi->back->hw; + u8 type, filter_index; + i40e_status ret; - idx = i40e_get_udp_port_idx(pf, port); + type = ti->type == UDP_TUNNEL_TYPE_VXLAN ? I40E_AQC_TUNNEL_TYPE_VXLAN : + I40E_AQC_TUNNEL_TYPE_NGE; - /* Check if port already exists */ - if (idx < I40E_MAX_PF_UDP_OFFLOAD_PORTS) { - netdev_info(netdev, "port %d already offloaded\n", port); - return; + ret = i40e_aq_add_udp_tunnel(hw, ntohs(ti->port), type, &filter_index, + NULL); + if (ret) { + netdev_info(netdev, "add UDP port failed, err %s aq_err %s\n", + i40e_stat_str(hw, ret), + i40e_aq_str(hw, hw->aq.asq_last_status)); + return -EIO; } - /* Now check if there is space to add the new port */ - next_idx = i40e_get_udp_port_idx(pf, 0); - - if (next_idx == I40E_MAX_PF_UDP_OFFLOAD_PORTS) { - netdev_info(netdev, "maximum number of offloaded UDP ports reached, not adding port %d\n", - port); - return; - } - - switch (ti->type) { - case UDP_TUNNEL_TYPE_VXLAN: - pf->udp_ports[next_idx].type = I40E_AQC_TUNNEL_TYPE_VXLAN; - break; - case UDP_TUNNEL_TYPE_GENEVE: - if (!(pf->hw_features & I40E_HW_GENEVE_OFFLOAD_CAPABLE)) - return; - pf->udp_ports[next_idx].type = I40E_AQC_TUNNEL_TYPE_NGE; - break; - default: - return; - } - - /* New port: add it and mark its index in the bitmap */ - pf->udp_ports[next_idx].port = port; - pf->udp_ports[next_idx].filter_index = I40E_UDP_PORT_INDEX_UNUSED; - pf->pending_udp_bitmap |= BIT_ULL(next_idx); - set_bit(__I40E_UDP_FILTER_SYNC_PENDING, pf->state); + udp_tunnel_nic_set_port_priv(netdev, table, idx, filter_index); + return 0; } -/** - * i40e_udp_tunnel_del - Get notifications about UDP tunnel ports that go away - * @netdev: This physical port's netdev - * @ti: Tunnel endpoint information - **/ -static void i40e_udp_tunnel_del(struct net_device *netdev, - struct udp_tunnel_info *ti) +static int i40e_udp_tunnel_unset_port(struct net_device *netdev, + unsigned int table, unsigned int idx, + struct udp_tunnel_info *ti) { struct i40e_netdev_priv *np = netdev_priv(netdev); - struct i40e_vsi *vsi = np->vsi; - struct i40e_pf *pf = vsi->back; - u16 port = ntohs(ti->port); - u8 idx; + struct i40e_hw *hw = &np->vsi->back->hw; + i40e_status ret; - idx = i40e_get_udp_port_idx(pf, port); - - /* Check if port already exists */ - if (idx >= I40E_MAX_PF_UDP_OFFLOAD_PORTS) - goto not_found; - - switch (ti->type) { - case UDP_TUNNEL_TYPE_VXLAN: - if (pf->udp_ports[idx].type != I40E_AQC_TUNNEL_TYPE_VXLAN) - goto not_found; - break; - case UDP_TUNNEL_TYPE_GENEVE: - if (pf->udp_ports[idx].type != I40E_AQC_TUNNEL_TYPE_NGE) - goto not_found; - break; - default: - goto not_found; + ret = i40e_aq_del_udp_tunnel(hw, ti->hw_priv, NULL); + if (ret) { + netdev_info(netdev, "delete UDP port failed, err %s aq_err %s\n", + i40e_stat_str(hw, ret), + i40e_aq_str(hw, hw->aq.asq_last_status)); + return -EIO; } - /* if port exists, set it to 0 (mark for deletion) - * and make it pending - */ - pf->udp_ports[idx].port = 0; - - /* Toggle pending bit instead of setting it. This way if we are - * deleting a port that has yet to be added we just clear the pending - * bit and don't have to worry about it. - */ - pf->pending_udp_bitmap ^= BIT_ULL(idx); - set_bit(__I40E_UDP_FILTER_SYNC_PENDING, pf->state); - - return; -not_found: - netdev_warn(netdev, "UDP port %d was not found, not deleting\n", - port); + return 0; } static int i40e_get_phys_port_id(struct net_device *netdev, @@ -11709,11 +12439,13 @@ * @addr: the MAC address entry being added * @vid: VLAN ID * @flags: instructions from stack about fdb operation + * @extack: netlink extended ack, unused currently */ static int i40e_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 vid, - u16 flags) + u16 flags, + struct netlink_ext_ack *extack) { struct i40e_netdev_priv *np = netdev_priv(dev); struct i40e_pf *pf = np->vsi->back; @@ -11754,6 +12486,7 @@ * @dev: the netdev being configured * @nlh: RTNL message * @flags: bridge flags + * @extack: netlink extended ack * * Inserts a new hardware bridge if not already created and * enables the bridging mode requested (VEB or VEPA). If the @@ -11766,7 +12499,8 @@ **/ static int i40e_ndo_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh, - u16 flags) + u16 flags, + struct netlink_ext_ack *extack) { struct i40e_netdev_priv *np = netdev_priv(dev); struct i40e_vsi *vsi = np->vsi; @@ -11953,8 +12687,20 @@ old_prog = xchg(&vsi->xdp_prog, prog); - if (need_reset) + if (need_reset) { + if (!prog) + /* Wait until ndo_xsk_wakeup completes. */ + synchronize_rcu(); i40e_reset_and_rebuild(pf, true, true); + } + + if (!i40e_enabled_xdp_vsi(vsi) && prog) { + if (i40e_realloc_rx_bi_zc(vsi, true)) + return -ENOMEM; + } else if (i40e_enabled_xdp_vsi(vsi) && !prog) { + if (i40e_realloc_rx_bi_zc(vsi, false)) + return -ENOMEM; + } for (i = 0; i < vsi->num_queue_pairs; i++) WRITE_ONCE(vsi->rx_rings[i]->xdp_prog, vsi->xdp_prog); @@ -11962,7 +12708,272 @@ if (old_prog) bpf_prog_put(old_prog); + /* Kick start the NAPI context if there is an AF_XDP socket open + * on that queue id. This so that receiving will start. + */ + if (need_reset && prog) + for (i = 0; i < vsi->num_queue_pairs; i++) + if (vsi->xdp_rings[i]->xsk_pool) + (void)i40e_xsk_wakeup(vsi->netdev, i, + XDP_WAKEUP_RX); + return 0; +} + +/** + * i40e_enter_busy_conf - Enters busy config state + * @vsi: vsi + * + * Returns 0 on success, <0 for failure. + **/ +static int i40e_enter_busy_conf(struct i40e_vsi *vsi) +{ + struct i40e_pf *pf = vsi->back; + int timeout = 50; + + while (test_and_set_bit(__I40E_CONFIG_BUSY, pf->state)) { + timeout--; + if (!timeout) + return -EBUSY; + usleep_range(1000, 2000); + } + + return 0; +} + +/** + * i40e_exit_busy_conf - Exits busy config state + * @vsi: vsi + **/ +static void i40e_exit_busy_conf(struct i40e_vsi *vsi) +{ + struct i40e_pf *pf = vsi->back; + + clear_bit(__I40E_CONFIG_BUSY, pf->state); +} + +/** + * i40e_queue_pair_reset_stats - Resets all statistics for a queue pair + * @vsi: vsi + * @queue_pair: queue pair + **/ +static void i40e_queue_pair_reset_stats(struct i40e_vsi *vsi, int queue_pair) +{ + memset(&vsi->rx_rings[queue_pair]->rx_stats, 0, + sizeof(vsi->rx_rings[queue_pair]->rx_stats)); + memset(&vsi->tx_rings[queue_pair]->stats, 0, + sizeof(vsi->tx_rings[queue_pair]->stats)); + if (i40e_enabled_xdp_vsi(vsi)) { + memset(&vsi->xdp_rings[queue_pair]->stats, 0, + sizeof(vsi->xdp_rings[queue_pair]->stats)); + } +} + +/** + * i40e_queue_pair_clean_rings - Cleans all the rings of a queue pair + * @vsi: vsi + * @queue_pair: queue pair + **/ +static void i40e_queue_pair_clean_rings(struct i40e_vsi *vsi, int queue_pair) +{ + i40e_clean_tx_ring(vsi->tx_rings[queue_pair]); + if (i40e_enabled_xdp_vsi(vsi)) { + /* Make sure that in-progress ndo_xdp_xmit calls are + * completed. + */ + synchronize_rcu(); + i40e_clean_tx_ring(vsi->xdp_rings[queue_pair]); + } + i40e_clean_rx_ring(vsi->rx_rings[queue_pair]); +} + +/** + * i40e_queue_pair_toggle_napi - Enables/disables NAPI for a queue pair + * @vsi: vsi + * @queue_pair: queue pair + * @enable: true for enable, false for disable + **/ +static void i40e_queue_pair_toggle_napi(struct i40e_vsi *vsi, int queue_pair, + bool enable) +{ + struct i40e_ring *rxr = vsi->rx_rings[queue_pair]; + struct i40e_q_vector *q_vector = rxr->q_vector; + + if (!vsi->netdev) + return; + + /* All rings in a qp belong to the same qvector. */ + if (q_vector->rx.ring || q_vector->tx.ring) { + if (enable) + napi_enable(&q_vector->napi); + else + napi_disable(&q_vector->napi); + } +} + +/** + * i40e_queue_pair_toggle_rings - Enables/disables all rings for a queue pair + * @vsi: vsi + * @queue_pair: queue pair + * @enable: true for enable, false for disable + * + * Returns 0 on success, <0 on failure. + **/ +static int i40e_queue_pair_toggle_rings(struct i40e_vsi *vsi, int queue_pair, + bool enable) +{ + struct i40e_pf *pf = vsi->back; + int pf_q, ret = 0; + + pf_q = vsi->base_queue + queue_pair; + ret = i40e_control_wait_tx_q(vsi->seid, pf, pf_q, + false /*is xdp*/, enable); + if (ret) { + dev_info(&pf->pdev->dev, + "VSI seid %d Tx ring %d %sable timeout\n", + vsi->seid, pf_q, (enable ? "en" : "dis")); + return ret; + } + + i40e_control_rx_q(pf, pf_q, enable); + ret = i40e_pf_rxq_wait(pf, pf_q, enable); + if (ret) { + dev_info(&pf->pdev->dev, + "VSI seid %d Rx ring %d %sable timeout\n", + vsi->seid, pf_q, (enable ? "en" : "dis")); + return ret; + } + + /* Due to HW errata, on Rx disable only, the register can + * indicate done before it really is. Needs 50ms to be sure + */ + if (!enable) + mdelay(50); + + if (!i40e_enabled_xdp_vsi(vsi)) + return ret; + + ret = i40e_control_wait_tx_q(vsi->seid, pf, + pf_q + vsi->alloc_queue_pairs, + true /*is xdp*/, enable); + if (ret) { + dev_info(&pf->pdev->dev, + "VSI seid %d XDP Tx ring %d %sable timeout\n", + vsi->seid, pf_q, (enable ? "en" : "dis")); + } + + return ret; +} + +/** + * i40e_queue_pair_enable_irq - Enables interrupts for a queue pair + * @vsi: vsi + * @queue_pair: queue_pair + **/ +static void i40e_queue_pair_enable_irq(struct i40e_vsi *vsi, int queue_pair) +{ + struct i40e_ring *rxr = vsi->rx_rings[queue_pair]; + struct i40e_pf *pf = vsi->back; + struct i40e_hw *hw = &pf->hw; + + /* All rings in a qp belong to the same qvector. */ + if (pf->flags & I40E_FLAG_MSIX_ENABLED) + i40e_irq_dynamic_enable(vsi, rxr->q_vector->v_idx); + else + i40e_irq_dynamic_enable_icr0(pf); + + i40e_flush(hw); +} + +/** + * i40e_queue_pair_disable_irq - Disables interrupts for a queue pair + * @vsi: vsi + * @queue_pair: queue_pair + **/ +static void i40e_queue_pair_disable_irq(struct i40e_vsi *vsi, int queue_pair) +{ + struct i40e_ring *rxr = vsi->rx_rings[queue_pair]; + struct i40e_pf *pf = vsi->back; + struct i40e_hw *hw = &pf->hw; + + /* For simplicity, instead of removing the qp interrupt causes + * from the interrupt linked list, we simply disable the interrupt, and + * leave the list intact. + * + * All rings in a qp belong to the same qvector. + */ + if (pf->flags & I40E_FLAG_MSIX_ENABLED) { + u32 intpf = vsi->base_vector + rxr->q_vector->v_idx; + + wr32(hw, I40E_PFINT_DYN_CTLN(intpf - 1), 0); + i40e_flush(hw); + synchronize_irq(pf->msix_entries[intpf].vector); + } else { + /* Legacy and MSI mode - this stops all interrupt handling */ + wr32(hw, I40E_PFINT_ICR0_ENA, 0); + wr32(hw, I40E_PFINT_DYN_CTL0, 0); + i40e_flush(hw); + synchronize_irq(pf->pdev->irq); + } +} + +/** + * i40e_queue_pair_disable - Disables a queue pair + * @vsi: vsi + * @queue_pair: queue pair + * + * Returns 0 on success, <0 on failure. + **/ +int i40e_queue_pair_disable(struct i40e_vsi *vsi, int queue_pair) +{ + int err; + + err = i40e_enter_busy_conf(vsi); + if (err) + return err; + + i40e_queue_pair_disable_irq(vsi, queue_pair); + err = i40e_queue_pair_toggle_rings(vsi, queue_pair, false /* off */); + i40e_clean_rx_ring(vsi->rx_rings[queue_pair]); + i40e_queue_pair_toggle_napi(vsi, queue_pair, false /* off */); + i40e_queue_pair_clean_rings(vsi, queue_pair); + i40e_queue_pair_reset_stats(vsi, queue_pair); + + return err; +} + +/** + * i40e_queue_pair_enable - Enables a queue pair + * @vsi: vsi + * @queue_pair: queue pair + * + * Returns 0 on success, <0 on failure. + **/ +int i40e_queue_pair_enable(struct i40e_vsi *vsi, int queue_pair) +{ + int err; + + err = i40e_configure_tx_ring(vsi->tx_rings[queue_pair]); + if (err) + return err; + + if (i40e_enabled_xdp_vsi(vsi)) { + err = i40e_configure_tx_ring(vsi->xdp_rings[queue_pair]); + if (err) + return err; + } + + err = i40e_configure_rx_ring(vsi->rx_rings[queue_pair]); + if (err) + return err; + + err = i40e_queue_pair_toggle_rings(vsi, queue_pair, true /* on */); + i40e_queue_pair_toggle_napi(vsi, queue_pair, true /* on */); + i40e_queue_pair_enable_irq(vsi, queue_pair); + + i40e_exit_busy_conf(vsi); + + return err; } /** @@ -11982,9 +12993,9 @@ switch (xdp->command) { case XDP_SETUP_PROG: return i40e_xdp_setup(vsi, xdp->prog); - case XDP_QUERY_PROG: - xdp->prog_id = vsi->xdp_prog ? vsi->xdp_prog->aux->id : 0; - return 0; + case XDP_SETUP_XSK_POOL: + return i40e_xsk_pool_setup(vsi, xdp->xsk.pool, + xdp->xsk.queue_id); default: return -EINVAL; } @@ -12007,16 +13018,18 @@ .ndo_poll_controller = i40e_netpoll, #endif .ndo_setup_tc = __i40e_setup_tc, + .ndo_select_queue = i40e_lan_select_queue, .ndo_set_features = i40e_set_features, .ndo_set_vf_mac = i40e_ndo_set_vf_mac, .ndo_set_vf_vlan = i40e_ndo_set_vf_port_vlan, + .ndo_get_vf_stats = i40e_get_vf_stats, .ndo_set_vf_rate = i40e_ndo_set_vf_bw, .ndo_get_vf_config = i40e_ndo_get_vf_config, .ndo_set_vf_link_state = i40e_ndo_set_vf_link_state, .ndo_set_vf_spoofchk = i40e_ndo_set_vf_spoofchk, .ndo_set_vf_trust = i40e_ndo_set_vf_trust, - .ndo_udp_tunnel_add = i40e_udp_tunnel_add, - .ndo_udp_tunnel_del = i40e_udp_tunnel_del, + .ndo_udp_tunnel_add = udp_tunnel_nic_add_port, + .ndo_udp_tunnel_del = udp_tunnel_nic_del_port, .ndo_get_phys_port_id = i40e_get_phys_port_id, .ndo_fdb_add = i40e_ndo_fdb_add, .ndo_features_check = i40e_features_check, @@ -12024,6 +13037,9 @@ .ndo_bridge_setlink = i40e_ndo_bridge_setlink, .ndo_bpf = i40e_xdp, .ndo_xdp_xmit = i40e_xdp_xmit, + .ndo_xsk_wakeup = i40e_xsk_wakeup, + .ndo_dfwd_add_station = i40e_fwd_add, + .ndo_dfwd_del_station = i40e_fwd_del, }; /** @@ -12068,6 +13084,7 @@ NETIF_F_GSO_IPXIP6 | NETIF_F_GSO_UDP_TUNNEL | NETIF_F_GSO_UDP_TUNNEL_CSUM | + NETIF_F_GSO_UDP_L4 | NETIF_F_SCTP_CRC | NETIF_F_RXHASH | NETIF_F_RXCSUM | @@ -12076,6 +13093,8 @@ if (!(pf->hw_features & I40E_HW_OUTER_UDP_CSUM_CAPABLE)) netdev->gso_partial_features |= NETIF_F_GSO_UDP_TUNNEL_CSUM; + netdev->udp_tunnel_nic_info = &pf->udp_tunnel_nic; + netdev->gso_partial_features |= NETIF_F_GSO_GRE_CSUM; netdev->hw_enc_features |= hw_enc_features; @@ -12083,12 +13102,15 @@ /* record features VLANs can make use of */ netdev->vlan_features |= hw_enc_features | NETIF_F_TSO_MANGLEID; - if (!(pf->flags & I40E_FLAG_MFP_ENABLED)) - netdev->hw_features |= NETIF_F_NTUPLE | NETIF_F_HW_TC; + /* enable macvlan offloads */ + netdev->hw_features |= NETIF_F_HW_L2FW_DOFFLOAD; hw_features = hw_enc_features | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX; + + if (!(pf->flags & I40E_FLAG_MFP_ENABLED)) + hw_features |= NETIF_F_NTUPLE | NETIF_F_HW_TC; netdev->hw_features |= hw_features; @@ -12195,7 +13217,7 @@ struct i40e_pf *pf = vsi->back; /* Uplink is not a bridge so default to VEB */ - if (vsi->veb_idx == I40E_NO_VEB) + if (vsi->veb_idx >= I40E_MAX_VEB) return 1; veb = pf->veb[vsi->veb_idx]; @@ -12836,8 +13858,7 @@ /* Setup DCB netlink interface */ i40e_dcbnl_setup(vsi); #endif /* CONFIG_I40E_DCB */ - /* fall through */ - + fallthrough; case I40E_VSI_FDIR: /* set up vectors and rings if needed */ ret = i40e_vsi_setup_vectors(vsi); @@ -12853,7 +13874,6 @@ i40e_vsi_reset_stats(vsi); break; - default: /* no netdev or rings for the other VSI types */ break; @@ -13179,7 +14199,7 @@ for (vsi_idx = 0; vsi_idx < pf->num_alloc_vsi; vsi_idx++) if (pf->vsi[vsi_idx] && pf->vsi[vsi_idx]->seid == vsi_seid) break; - if (vsi_idx >= pf->num_alloc_vsi && vsi_seid != 0) { + if (vsi_idx == pf->num_alloc_vsi && vsi_seid != 0) { dev_info(&pf->pdev->dev, "vsi seid %d not found\n", vsi_seid); return NULL; @@ -13256,7 +14276,7 @@ /* Main VEB? */ if (uplink_seid != pf->mac_seid) break; - if (pf->lan_veb == I40E_NO_VEB) { + if (pf->lan_veb >= I40E_MAX_VEB) { int v; /* find existing or else empty VEB */ @@ -13266,13 +14286,15 @@ break; } } - if (pf->lan_veb == I40E_NO_VEB) { + if (pf->lan_veb >= I40E_MAX_VEB) { v = i40e_veb_mem_alloc(pf); if (v < 0) break; pf->lan_veb = v; } } + if (pf->lan_veb >= I40E_MAX_VEB) + break; pf->veb[pf->lan_veb]->seid = seid; pf->veb[pf->lan_veb]->uplink_seid = pf->mac_seid; @@ -13370,10 +14392,11 @@ * i40e_setup_pf_switch - Setup the HW switch on startup or after reset * @pf: board private structure * @reinit: if the Main VSI needs to re-initialized. + * @lock_acquired: indicates whether or not the lock has been acquired * * Returns 0 on success, negative value on failure **/ -static int i40e_setup_pf_switch(struct i40e_pf *pf, bool reinit) +static int i40e_setup_pf_switch(struct i40e_pf *pf, bool reinit, bool lock_acquired) { u16 flags = 0; int ret; @@ -13426,7 +14449,7 @@ /* Set up the PF VSI associated with the PF's main VSI * that is already in the HW switch */ - if (pf->lan_veb != I40E_NO_VEB && pf->veb[pf->lan_veb]) + if (pf->lan_veb < I40E_MAX_VEB && pf->veb[pf->lan_veb]) uplink_seid = pf->veb[pf->lan_veb]->seid; else uplink_seid = pf->mac_seid; @@ -13475,8 +14498,14 @@ i40e_ptp_init(pf); + if (!lock_acquired) + rtnl_lock(); + /* repopulate tunnel port filters */ - i40e_sync_udp_filters(pf); + udp_tunnel_nic_reset_ntf(pf->vsi[pf->lan_vsi]->netdev); + + if (!lock_acquired) + rtnl_unlock(); return ret; } @@ -13626,29 +14655,29 @@ i = snprintf(buf, INFO_STRING_LEN, "Features: PF-id[%d]", hw->pf_id); #ifdef CONFIG_PCI_IOV - i += snprintf(&buf[i], REMAIN(i), " VFs: %d", pf->num_req_vfs); + i += scnprintf(&buf[i], REMAIN(i), " VFs: %d", pf->num_req_vfs); #endif - i += snprintf(&buf[i], REMAIN(i), " VSIs: %d QP: %d", + i += scnprintf(&buf[i], REMAIN(i), " VSIs: %d QP: %d", pf->hw.func_caps.num_vsis, pf->vsi[pf->lan_vsi]->num_queue_pairs); if (pf->flags & I40E_FLAG_RSS_ENABLED) - i += snprintf(&buf[i], REMAIN(i), " RSS"); + i += scnprintf(&buf[i], REMAIN(i), " RSS"); if (pf->flags & I40E_FLAG_FD_ATR_ENABLED) - i += snprintf(&buf[i], REMAIN(i), " FD_ATR"); + i += scnprintf(&buf[i], REMAIN(i), " FD_ATR"); if (pf->flags & I40E_FLAG_FD_SB_ENABLED) { - i += snprintf(&buf[i], REMAIN(i), " FD_SB"); - i += snprintf(&buf[i], REMAIN(i), " NTUPLE"); + i += scnprintf(&buf[i], REMAIN(i), " FD_SB"); + i += scnprintf(&buf[i], REMAIN(i), " NTUPLE"); } if (pf->flags & I40E_FLAG_DCB_CAPABLE) - i += snprintf(&buf[i], REMAIN(i), " DCB"); - i += snprintf(&buf[i], REMAIN(i), " VxLAN"); - i += snprintf(&buf[i], REMAIN(i), " Geneve"); + i += scnprintf(&buf[i], REMAIN(i), " DCB"); + i += scnprintf(&buf[i], REMAIN(i), " VxLAN"); + i += scnprintf(&buf[i], REMAIN(i), " Geneve"); if (pf->flags & I40E_FLAG_PTP) - i += snprintf(&buf[i], REMAIN(i), " PTP"); + i += scnprintf(&buf[i], REMAIN(i), " PTP"); if (pf->flags & I40E_FLAG_VEB_MODE_ENABLED) - i += snprintf(&buf[i], REMAIN(i), " VEB"); + i += scnprintf(&buf[i], REMAIN(i), " VEB"); else - i += snprintf(&buf[i], REMAIN(i), " VEPA"); + i += scnprintf(&buf[i], REMAIN(i), " VEPA"); dev_info(&pf->pdev->dev, "%s\n", buf); kfree(buf); @@ -13669,6 +14698,236 @@ { if (eth_platform_get_mac_address(&pdev->dev, pf->hw.mac.addr)) i40e_get_mac_addr(&pf->hw, pf->hw.mac.addr); +} + +/** + * i40e_set_fec_in_flags - helper function for setting FEC options in flags + * @fec_cfg: FEC option to set in flags + * @flags: ptr to flags in which we set FEC option + **/ +void i40e_set_fec_in_flags(u8 fec_cfg, u32 *flags) +{ + if (fec_cfg & I40E_AQ_SET_FEC_AUTO) + *flags |= I40E_FLAG_RS_FEC | I40E_FLAG_BASE_R_FEC; + if ((fec_cfg & I40E_AQ_SET_FEC_REQUEST_RS) || + (fec_cfg & I40E_AQ_SET_FEC_ABILITY_RS)) { + *flags |= I40E_FLAG_RS_FEC; + *flags &= ~I40E_FLAG_BASE_R_FEC; + } + if ((fec_cfg & I40E_AQ_SET_FEC_REQUEST_KR) || + (fec_cfg & I40E_AQ_SET_FEC_ABILITY_KR)) { + *flags |= I40E_FLAG_BASE_R_FEC; + *flags &= ~I40E_FLAG_RS_FEC; + } + if (fec_cfg == 0) + *flags &= ~(I40E_FLAG_RS_FEC | I40E_FLAG_BASE_R_FEC); +} + +/** + * i40e_check_recovery_mode - check if we are running transition firmware + * @pf: board private structure + * + * Check registers indicating the firmware runs in recovery mode. Sets the + * appropriate driver state. + * + * Returns true if the recovery mode was detected, false otherwise + **/ +static bool i40e_check_recovery_mode(struct i40e_pf *pf) +{ + u32 val = rd32(&pf->hw, I40E_GL_FWSTS); + + if (val & I40E_GL_FWSTS_FWS1B_MASK) { + dev_crit(&pf->pdev->dev, "Firmware recovery mode detected. Limiting functionality.\n"); + dev_crit(&pf->pdev->dev, "Refer to the Intel(R) Ethernet Adapters and Devices User Guide for details on firmware recovery mode.\n"); + set_bit(__I40E_RECOVERY_MODE, pf->state); + + return true; + } + if (test_bit(__I40E_RECOVERY_MODE, pf->state)) + dev_info(&pf->pdev->dev, "Please do Power-On Reset to initialize adapter in normal mode with full functionality.\n"); + + return false; +} + +/** + * i40e_pf_loop_reset - perform reset in a loop. + * @pf: board private structure + * + * This function is useful when a NIC is about to enter recovery mode. + * When a NIC's internal data structures are corrupted the NIC's + * firmware is going to enter recovery mode. + * Right after a POR it takes about 7 minutes for firmware to enter + * recovery mode. Until that time a NIC is in some kind of intermediate + * state. After that time period the NIC almost surely enters + * recovery mode. The only way for a driver to detect intermediate + * state is to issue a series of pf-resets and check a return value. + * If a PF reset returns success then the firmware could be in recovery + * mode so the caller of this code needs to check for recovery mode + * if this function returns success. There is a little chance that + * firmware will hang in intermediate state forever. + * Since waiting 7 minutes is quite a lot of time this function waits + * 10 seconds and then gives up by returning an error. + * + * Return 0 on success, negative on failure. + **/ +static i40e_status i40e_pf_loop_reset(struct i40e_pf *pf) +{ + /* wait max 10 seconds for PF reset to succeed */ + const unsigned long time_end = jiffies + 10 * HZ; + + struct i40e_hw *hw = &pf->hw; + i40e_status ret; + + ret = i40e_pf_reset(hw); + while (ret != I40E_SUCCESS && time_before(jiffies, time_end)) { + usleep_range(10000, 20000); + ret = i40e_pf_reset(hw); + } + + if (ret == I40E_SUCCESS) + pf->pfr_count++; + else + dev_info(&pf->pdev->dev, "PF reset failed: %d\n", ret); + + return ret; +} + +/** + * i40e_check_fw_empr - check if FW issued unexpected EMP Reset + * @pf: board private structure + * + * Check FW registers to determine if FW issued unexpected EMP Reset. + * Every time when unexpected EMP Reset occurs the FW increments + * a counter of unexpected EMP Resets. When the counter reaches 10 + * the FW should enter the Recovery mode + * + * Returns true if FW issued unexpected EMP Reset + **/ +static bool i40e_check_fw_empr(struct i40e_pf *pf) +{ + const u32 fw_sts = rd32(&pf->hw, I40E_GL_FWSTS) & + I40E_GL_FWSTS_FWS1B_MASK; + return (fw_sts > I40E_GL_FWSTS_FWS1B_EMPR_0) && + (fw_sts <= I40E_GL_FWSTS_FWS1B_EMPR_10); +} + +/** + * i40e_handle_resets - handle EMP resets and PF resets + * @pf: board private structure + * + * Handle both EMP resets and PF resets and conclude whether there are + * any issues regarding these resets. If there are any issues then + * generate log entry. + * + * Return 0 if NIC is healthy or negative value when there are issues + * with resets + **/ +static i40e_status i40e_handle_resets(struct i40e_pf *pf) +{ + const i40e_status pfr = i40e_pf_loop_reset(pf); + const bool is_empr = i40e_check_fw_empr(pf); + + if (is_empr || pfr != I40E_SUCCESS) + dev_crit(&pf->pdev->dev, "Entering recovery mode due to repeated FW resets. This may take several minutes. Refer to the Intel(R) Ethernet Adapters and Devices User Guide.\n"); + + return is_empr ? I40E_ERR_RESET_FAILED : pfr; +} + +/** + * i40e_init_recovery_mode - initialize subsystems needed in recovery mode + * @pf: board private structure + * @hw: ptr to the hardware info + * + * This function does a minimal setup of all subsystems needed for running + * recovery mode. + * + * Returns 0 on success, negative on failure + **/ +static int i40e_init_recovery_mode(struct i40e_pf *pf, struct i40e_hw *hw) +{ + struct i40e_vsi *vsi; + int err; + int v_idx; + + pci_save_state(pf->pdev); + + /* set up periodic task facility */ + timer_setup(&pf->service_timer, i40e_service_timer, 0); + pf->service_timer_period = HZ; + + INIT_WORK(&pf->service_task, i40e_service_task); + clear_bit(__I40E_SERVICE_SCHED, pf->state); + + err = i40e_init_interrupt_scheme(pf); + if (err) + goto err_switch_setup; + + /* The number of VSIs reported by the FW is the minimum guaranteed + * to us; HW supports far more and we share the remaining pool with + * the other PFs. We allocate space for more than the guarantee with + * the understanding that we might not get them all later. + */ + if (pf->hw.func_caps.num_vsis < I40E_MIN_VSI_ALLOC) + pf->num_alloc_vsi = I40E_MIN_VSI_ALLOC; + else + pf->num_alloc_vsi = pf->hw.func_caps.num_vsis; + + /* Set up the vsi struct and our local tracking of the MAIN PF vsi. */ + pf->vsi = kcalloc(pf->num_alloc_vsi, sizeof(struct i40e_vsi *), + GFP_KERNEL); + if (!pf->vsi) { + err = -ENOMEM; + goto err_switch_setup; + } + + /* We allocate one VSI which is needed as absolute minimum + * in order to register the netdev + */ + v_idx = i40e_vsi_mem_alloc(pf, I40E_VSI_MAIN); + if (v_idx < 0) { + err = v_idx; + goto err_switch_setup; + } + pf->lan_vsi = v_idx; + vsi = pf->vsi[v_idx]; + if (!vsi) { + err = -EFAULT; + goto err_switch_setup; + } + vsi->alloc_queue_pairs = 1; + err = i40e_config_netdev(vsi); + if (err) + goto err_switch_setup; + err = register_netdev(vsi->netdev); + if (err) + goto err_switch_setup; + vsi->netdev_registered = true; + i40e_dbg_pf_init(pf); + + err = i40e_setup_misc_vector_for_recovery_mode(pf); + if (err) + goto err_switch_setup; + + /* tell the firmware that we're starting */ + i40e_send_version(pf); + + /* since everything's happy, start the service_task timer */ + mod_timer(&pf->service_timer, + round_jiffies(jiffies + pf->service_timer_period)); + + return 0; + +err_switch_setup: + i40e_reset_interrupt_capability(pf); + del_timer_sync(&pf->service_timer); + i40e_shutdown_adminq(hw); + iounmap(hw->hw_addr); + pci_disable_pcie_error_reporting(pf->pdev); + pci_release_mem_regions(pf->pdev); + pci_disable_device(pf->pdev); + kfree(pf); + + return err; } /** @@ -13739,7 +14998,17 @@ pf->ioremap_len = min_t(int, pci_resource_len(pdev, 0), I40E_MAX_CSR_SPACE); - + /* We believe that the highest register to read is + * I40E_GLGEN_STAT_CLEAR, so we check if the BAR size + * is not less than that before mapping to prevent a + * kernel panic. + */ + if (pf->ioremap_len < I40E_GLGEN_STAT_CLEAR) { + dev_err(&pdev->dev, "Cannot map registers, bar size 0x%X too small, aborting\n", + pf->ioremap_len); + err = -ENOMEM; + goto err_ioremap; + } hw->hw_addr = ioremap(pci_resource_start(pdev, 0), pf->ioremap_len); if (!hw->hw_addr) { err = -EIO; @@ -13767,6 +15036,7 @@ INIT_LIST_HEAD(&pf->l3_flex_pit_list); INIT_LIST_HEAD(&pf->l4_flex_pit_list); + INIT_LIST_HEAD(&pf->ddp_old_prof); /* set up the locks for the AQ, do this only once in probe * and destroy them only once in remove @@ -13794,12 +15064,19 @@ /* Reset here to make sure all is clean and to define PF 'n' */ i40e_clear_hw(hw); - err = i40e_pf_reset(hw); + + err = i40e_set_mac_type(hw); if (err) { - dev_info(&pdev->dev, "Initial pf_reset failed: %d\n", err); + dev_warn(&pdev->dev, "unidentified MAC or BLANK NVM: %d\n", + err); goto err_pf_reset; } - pf->pfr_count++; + + err = i40e_handle_resets(pf); + if (err) + goto err_pf_reset; + + i40e_check_recovery_mode(pf); hw->aq.num_arq_entries = I40E_AQ_LEN; hw->aq.num_asq_entries = I40E_AQ_LEN; @@ -13825,7 +15102,11 @@ if (err) { if (err == I40E_ERR_FIRMWARE_API_VERSION) dev_info(&pdev->dev, - "The driver for the device stopped because the NVM image is newer than expected. You must install the most recent version of the network driver.\n"); + "The driver for the device stopped because the NVM image v%u.%u is newer than expected v%u.%u. You must install the most recent version of the network driver.\n", + hw->aq.api_maj_ver, + hw->aq.api_min_ver, + I40E_FW_API_VERSION_MAJOR, + I40E_FW_MINOR_VERSION(hw)); else dev_info(&pdev->dev, "The driver for the device stopped because the device firmware failed to init. Try updating your NVM image.\n"); @@ -13834,19 +15115,28 @@ } i40e_get_oem_version(hw); - /* provide nvm, fw, api versions */ - dev_info(&pdev->dev, "fw %d.%d.%05d api %d.%d nvm %s\n", + /* provide nvm, fw, api versions, vendor:device id, subsys vendor:device id */ + dev_info(&pdev->dev, "fw %d.%d.%05d api %d.%d nvm %s [%04x:%04x] [%04x:%04x]\n", hw->aq.fw_maj_ver, hw->aq.fw_min_ver, hw->aq.fw_build, hw->aq.api_maj_ver, hw->aq.api_min_ver, - i40e_nvm_version_str(hw)); + i40e_nvm_version_str(hw), hw->vendor_id, hw->device_id, + hw->subsystem_vendor_id, hw->subsystem_device_id); if (hw->aq.api_maj_ver == I40E_FW_API_VERSION_MAJOR && hw->aq.api_min_ver > I40E_FW_MINOR_VERSION(hw)) - dev_info(&pdev->dev, - "The driver for the device detected a newer version of the NVM image than expected. Please install the most recent version of the network driver.\n"); + dev_dbg(&pdev->dev, + "The driver for the device detected a newer version of the NVM image v%u.%u than v%u.%u.\n", + hw->aq.api_maj_ver, + hw->aq.api_min_ver, + I40E_FW_API_VERSION_MAJOR, + I40E_FW_MINOR_VERSION(hw)); else if (hw->aq.api_maj_ver == 1 && hw->aq.api_min_ver < 4) dev_info(&pdev->dev, - "The driver for the device detected an older version of the NVM image than expected. Please update the NVM image.\n"); + "The driver for the device detected an older version of the NVM image v%u.%u than expected v%u.%u. Please update the NVM image.\n", + hw->aq.api_maj_ver, + hw->aq.api_min_ver, + I40E_FW_API_VERSION_MAJOR, + I40E_FW_MINOR_VERSION(hw)); i40e_verify_eeprom(pf); @@ -13855,6 +15145,7 @@ dev_warn(&pdev->dev, "This device is a pre-production adapter/LOM. Please be aware there may be issues with your hardware. If you are experiencing problems please contact your Intel or hardware representative who provided you with this hardware.\n"); i40e_clear_pxe_mode(hw); + err = i40e_get_capabilities(pf, i40e_aqc_opc_list_func_capabilities); if (err) goto err_adminq_setup; @@ -13864,6 +15155,9 @@ dev_info(&pdev->dev, "sw_init failed: %d\n", err); goto err_sw_init; } + + if (test_bit(__I40E_RECOVERY_MODE, pf->state)) + return i40e_init_recovery_mode(pf, hw); err = i40e_init_lan_hmc(hw, hw->func_caps.num_tx_qp, hw->func_caps.num_rx_qp, 0, 0); @@ -13885,7 +15179,7 @@ */ if (pf->hw_features & I40E_HW_STOP_FW_LLDP) { dev_info(&pdev->dev, "Stopping firmware LLDP agent.\n"); - i40e_aq_stop_lldp(hw, true, NULL); + i40e_aq_stop_lldp(hw, true, false, NULL); } /* allow a platform config to override the HW addr */ @@ -13904,6 +15198,11 @@ pci_set_drvdata(pdev, pf); pci_save_state(pdev); + + dev_info(&pdev->dev, + (pf->flags & I40E_FLAG_DISABLE_FW_LLDP) ? + "FW LLDP is disabled\n" : + "FW LLDP is enabled\n"); /* Enable FW to write default DCB config on link-up */ i40e_aq_set_dcb_parameters(hw, true, NULL); @@ -13938,6 +15237,14 @@ if (err) goto err_switch_setup; + pf->udp_tunnel_nic.set_port = i40e_udp_tunnel_set_port; + pf->udp_tunnel_nic.unset_port = i40e_udp_tunnel_unset_port; + pf->udp_tunnel_nic.flags = UDP_TUNNEL_NIC_INFO_MAY_SLEEP; + pf->udp_tunnel_nic.shared = &pf->udp_tunnel_shared; + pf->udp_tunnel_nic.tables[0].n_entries = I40E_MAX_PF_UDP_OFFLOAD_PORTS; + pf->udp_tunnel_nic.tables[0].tunnel_types = UDP_TUNNEL_TYPE_VXLAN | + UDP_TUNNEL_TYPE_GENEVE; + /* The number of VSIs reported by the FW is the minimum guaranteed * to us; HW supports far more and we share the remaining pool with * the other PFs. We allocate space for more than the guarantee with @@ -13947,6 +15254,12 @@ pf->num_alloc_vsi = I40E_MIN_VSI_ALLOC; else pf->num_alloc_vsi = pf->hw.func_caps.num_vsis; + if (pf->num_alloc_vsi > UDP_TUNNEL_NIC_MAX_SHARING_DEVICES) { + dev_warn(&pf->pdev->dev, + "limiting the VSI count due to UDP tunnel limitation %d > %d\n", + pf->num_alloc_vsi, UDP_TUNNEL_NIC_MAX_SHARING_DEVICES); + pf->num_alloc_vsi = UDP_TUNNEL_NIC_MAX_SHARING_DEVICES; + } /* Set up the *vsi struct and our local tracking of the MAIN PF vsi. */ pf->vsi = kcalloc(pf->num_alloc_vsi, sizeof(struct i40e_vsi *), @@ -13965,7 +15278,7 @@ pf->flags |= I40E_FLAG_VEB_MODE_ENABLED; } #endif - err = i40e_setup_pf_switch(pf, false); + err = i40e_setup_pf_switch(pf, false, false); if (err) { dev_info(&pdev->dev, "setup_pf_switch failed: %d\n", err); goto err_vsis; @@ -14106,23 +15419,23 @@ switch (hw->bus.speed) { case i40e_bus_speed_8000: - strncpy(speed, "8.0", PCI_SPEED_SIZE); break; + strlcpy(speed, "8.0", PCI_SPEED_SIZE); break; case i40e_bus_speed_5000: - strncpy(speed, "5.0", PCI_SPEED_SIZE); break; + strlcpy(speed, "5.0", PCI_SPEED_SIZE); break; case i40e_bus_speed_2500: - strncpy(speed, "2.5", PCI_SPEED_SIZE); break; + strlcpy(speed, "2.5", PCI_SPEED_SIZE); break; default: break; } switch (hw->bus.width) { case i40e_bus_width_pcie_x8: - strncpy(width, "8", PCI_WIDTH_SIZE); break; + strlcpy(width, "8", PCI_WIDTH_SIZE); break; case i40e_bus_width_pcie_x4: - strncpy(width, "4", PCI_WIDTH_SIZE); break; + strlcpy(width, "4", PCI_WIDTH_SIZE); break; case i40e_bus_width_pcie_x2: - strncpy(width, "2", PCI_WIDTH_SIZE); break; + strlcpy(width, "2", PCI_WIDTH_SIZE); break; case i40e_bus_width_pcie_x1: - strncpy(width, "1", PCI_WIDTH_SIZE); break; + strlcpy(width, "1", PCI_WIDTH_SIZE); break; default: break; } @@ -14145,12 +15458,23 @@ i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); pf->hw.phy.link_info.requested_speeds = abilities.link_speed; + /* set the FEC config due to the board capabilities */ + i40e_set_fec_in_flags(abilities.fec_cfg_curr_mod_ext_info, &pf->flags); + /* get the supported phy types from the fw */ err = i40e_aq_get_phy_capabilities(hw, false, true, &abilities, NULL); if (err) dev_dbg(&pf->pdev->dev, "get supported phy types ret = %s last_status = %s\n", i40e_stat_str(&pf->hw, err), i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); + + /* make sure the MFS hasn't been set lower than the default */ +#define MAX_FRAME_SIZE_DEFAULT 0x2600 + val = (rd32(&pf->hw, I40E_PRTGL_SAH) & + I40E_PRTGL_SAH_MFS_MASK) >> I40E_PRTGL_SAH_MFS_SHIFT; + if (val < MAX_FRAME_SIZE_DEFAULT) + dev_warn(&pdev->dev, "MFS for port %x has been set below the default: %x\n", + i, val); /* Add a filter to drop all Flow control frames from any VSI from being * transmitted. By doing so we stop a malicious VF from sending out @@ -14239,6 +15563,19 @@ if (pf->service_task.func) cancel_work_sync(&pf->service_task); + if (test_bit(__I40E_RECOVERY_MODE, pf->state)) { + struct i40e_vsi *vsi = pf->vsi[0]; + + /* We know that we have allocated only one vsi for this PF, + * it was just for registering netdevice, so the interface + * could be visible in the 'ifconfig' output + */ + unregister_netdev(vsi->netdev); + free_netdev(vsi->netdev); + + goto unmap; + } + /* Client close must be called explicitly here because the timer * has been stopped. */ @@ -14283,6 +15620,12 @@ ret_code); } +unmap: + /* Free MSI/legacy interrupt 0 when in recovery mode. */ + if (test_bit(__I40E_RECOVERY_MODE, pf->state) && + !(pf->flags & I40E_FLAG_MSIX_ENABLED)) + free_irq(pf->pdev->irq, pf); + /* shutdown the adminq */ i40e_shutdown_adminq(hw); @@ -14295,7 +15638,8 @@ i40e_clear_interrupt_scheme(pf); for (i = 0; i < pf->num_alloc_vsi; i++) { if (pf->vsi[i]) { - i40e_vsi_clear_rings(pf->vsi[i]); + if (!test_bit(__I40E_RECOVERY_MODE, pf->state)) + i40e_vsi_clear_rings(pf->vsi[i]); i40e_vsi_clear(pf->vsi[i]); pf->vsi[i] = NULL; } @@ -14328,7 +15672,7 @@ * remediation. **/ static pci_ers_result_t i40e_pci_error_detected(struct pci_dev *pdev, - enum pci_channel_state error) + pci_channel_state_t error) { struct i40e_pf *pf = pci_get_drvdata(pdev); @@ -14361,7 +15705,6 @@ { struct i40e_pf *pf = pci_get_drvdata(pdev); pci_ers_result_t result; - int err; u32 reg; dev_dbg(&pdev->dev, "%s\n", __func__); @@ -14380,14 +15723,6 @@ result = PCI_ERS_RESULT_RECOVERED; else result = PCI_ERS_RESULT_DISCONNECT; - } - - err = pci_cleanup_aer_uncorrect_error_status(pdev); - if (err) { - dev_info(&pdev->dev, - "pci_cleanup_aer_uncorrect_error_status failed 0x%0x\n", - err); - /* non-fatal, continue */ } return result; @@ -14512,6 +15847,11 @@ wr32(hw, I40E_PFPM_WUFC, (pf->wol_en ? I40E_PFPM_WUFC_MAG_MASK : 0)); + /* Free MSI/legacy interrupt 0 when in recovery mode. */ + if (test_bit(__I40E_RECOVERY_MODE, pf->state) && + !(pf->flags & I40E_FLAG_MSIX_ENABLED)) + free_irq(pf->pdev->irq, pf); + /* Since we're going to destroy queues during the * i40e_clear_interrupt_scheme() we should hold the RTNL lock for this * whole section @@ -14532,8 +15872,7 @@ **/ static int __maybe_unused i40e_suspend(struct device *dev) { - struct pci_dev *pdev = to_pci_dev(dev); - struct i40e_pf *pf = pci_get_drvdata(pdev); + struct i40e_pf *pf = dev_get_drvdata(dev); struct i40e_hw *hw = &pf->hw; /* If we're already suspended, then there is nothing to do */ @@ -14583,8 +15922,7 @@ **/ static int __maybe_unused i40e_resume(struct device *dev) { - struct pci_dev *pdev = to_pci_dev(dev); - struct i40e_pf *pf = pci_get_drvdata(pdev); + struct i40e_pf *pf = dev_get_drvdata(dev); int err; /* If we're not suspended, then there is nothing to do */ @@ -14601,7 +15939,7 @@ */ err = i40e_restore_interrupt_scheme(pf); if (err) { - dev_err(&pdev->dev, "Cannot restore interrupt scheme: %d\n", + dev_err(dev, "Cannot restore interrupt scheme: %d\n", err); } @@ -14651,8 +15989,9 @@ **/ static int __init i40e_init_module(void) { - pr_info("%s: %s - version %s\n", i40e_driver_name, - i40e_driver_string, i40e_driver_version_str); + int err; + + pr_info("%s: %s\n", i40e_driver_name, i40e_driver_string); pr_info("%s: %s\n", i40e_driver_name, i40e_copyright); /* There is no need to throttle the number of active tasks because @@ -14669,7 +16008,14 @@ } i40e_dbg_init(); - return pci_register_driver(&i40e_driver); + err = pci_register_driver(&i40e_driver); + if (err) { + destroy_workqueue(i40e_wq); + i40e_dbg_exit(); + return err; + } + + return 0; } module_init(i40e_init_module); -- Gitblit v1.6.2