From 8d2a02b24d66aa359e83eebc1ed3c0f85367a1cb Mon Sep 17 00:00:00 2001 From: hc <hc@nodka.com> Date: Thu, 16 May 2024 03:11:33 +0000 Subject: [PATCH] AX88772C_eeprom and ax8872c build together --- kernel/drivers/net/hyperv/netvsc_drv.c | 574 ++++++++++++++++++++++++++++++++++++++++++-------------- 1 files changed, 427 insertions(+), 147 deletions(-) diff --git a/kernel/drivers/net/hyperv/netvsc_drv.c b/kernel/drivers/net/hyperv/netvsc_drv.c index 2dff0e1..f2020be 100644 --- a/kernel/drivers/net/hyperv/netvsc_drv.c +++ b/kernel/drivers/net/hyperv/netvsc_drv.c @@ -1,17 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2009, Microsoft Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, see <http://www.gnu.org/licenses/>. * * Authors: * Haiyang Zhang <haiyangz@microsoft.com> @@ -36,6 +25,7 @@ #include <linux/slab.h> #include <linux/rtnetlink.h> #include <linux/netpoll.h> +#include <linux/bpf.h> #include <net/arp.h> #include <net/route.h> @@ -146,7 +136,7 @@ * slave as up. If open fails, then slave will be * still be offline (and not used). */ - ret = dev_open(vf_netdev); + ret = dev_open(vf_netdev, NULL); if (ret) netdev_warn(net, "unable to open slave: %s: %d\n", @@ -246,6 +236,7 @@ ppi->size = ppi_size; ppi->type = pkt_type; + ppi->internal = 0; ppi->ppi_offset = sizeof(struct rndis_per_packet_info); rndis_pkt->per_pkt_info_len += ppi_size; @@ -327,7 +318,7 @@ * If a valid queue has already been assigned, then use that. * Otherwise compute tx queue based on hash and the send table. * - * This is basically similar to default (__netdev_pick_tx) with the added step + * This is basically similar to default (netdev_pick_tx) with the added step * of using the host send_table when no other queue has been assigned. * * TODO support XPS - but get_xps_queue not exported @@ -350,8 +341,7 @@ } static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb, - struct net_device *sb_dev, - select_queue_fallback_t fallback) + struct net_device *sb_dev) { struct net_device_context *ndc = netdev_priv(ndev); struct net_device *vf_netdev; @@ -363,10 +353,9 @@ const struct net_device_ops *vf_ops = vf_netdev->netdev_ops; if (vf_ops->ndo_select_queue) - txq = vf_ops->ndo_select_queue(vf_netdev, skb, - sb_dev, fallback); + txq = vf_ops->ndo_select_queue(vf_netdev, skb, sb_dev); else - txq = fallback(vf_netdev, skb, NULL); + txq = netdev_pick_tx(vf_netdev, skb, NULL); /* Record the queue selected by VF so that it can be * used for common case where VF has more queues than @@ -384,32 +373,29 @@ return txq; } -static u32 fill_pg_buf(struct page *page, u32 offset, u32 len, +static u32 fill_pg_buf(unsigned long hvpfn, u32 offset, u32 len, struct hv_page_buffer *pb) { int j = 0; - /* Deal with compund pages by ignoring unused part - * of the page. - */ - page += (offset >> PAGE_SHIFT); - offset &= ~PAGE_MASK; + hvpfn += offset >> HV_HYP_PAGE_SHIFT; + offset = offset & ~HV_HYP_PAGE_MASK; while (len > 0) { unsigned long bytes; - bytes = PAGE_SIZE - offset; + bytes = HV_HYP_PAGE_SIZE - offset; if (bytes > len) bytes = len; - pb[j].pfn = page_to_pfn(page); + pb[j].pfn = hvpfn; pb[j].offset = offset; pb[j].len = bytes; offset += bytes; len -= bytes; - if (offset == PAGE_SIZE && len) { - page++; + if (offset == HV_HYP_PAGE_SIZE && len) { + hvpfn++; offset = 0; j++; } @@ -432,23 +418,26 @@ * 2. skb linear data * 3. skb fragment data */ - slots_used += fill_pg_buf(virt_to_page(hdr), - offset_in_page(hdr), - len, &pb[slots_used]); + slots_used += fill_pg_buf(virt_to_hvpfn(hdr), + offset_in_hvpage(hdr), + len, + &pb[slots_used]); packet->rmsg_size = len; packet->rmsg_pgcnt = slots_used; - slots_used += fill_pg_buf(virt_to_page(data), - offset_in_page(data), - skb_headlen(skb), &pb[slots_used]); + slots_used += fill_pg_buf(virt_to_hvpfn(data), + offset_in_hvpage(data), + skb_headlen(skb), + &pb[slots_used]); for (i = 0; i < frags; i++) { skb_frag_t *frag = skb_shinfo(skb)->frags + i; - slots_used += fill_pg_buf(skb_frag_page(frag), - frag->page_offset, - skb_frag_size(frag), &pb[slots_used]); + slots_used += fill_pg_buf(page_to_hvpfn(skb_frag_page(frag)), + skb_frag_off(frag), + skb_frag_size(frag), + &pb[slots_used]); } return slots_used; } @@ -461,11 +450,11 @@ for (i = 0; i < frags; i++) { skb_frag_t *frag = skb_shinfo(skb)->frags + i; unsigned long size = skb_frag_size(frag); - unsigned long offset = frag->page_offset; + unsigned long offset = skb_frag_off(frag); /* Skip unused frames from start of page */ - offset &= ~PAGE_MASK; - pages += PFN_UP(offset + size); + offset &= ~HV_HYP_PAGE_MASK; + pages += HVPFN_UP(offset + size); } return pages; } @@ -473,12 +462,12 @@ static int netvsc_get_slots(struct sk_buff *skb) { char *data = skb->data; - unsigned int offset = offset_in_page(data); + unsigned int offset = offset_in_hvpage(data); unsigned int len = skb_headlen(skb); int slots; int frag_slots; - slots = DIV_ROUND_UP(offset + len, PAGE_SIZE); + slots = DIV_ROUND_UP(offset + len, HV_HYP_PAGE_SIZE); frag_slots = count_skb_frag_slots(skb); return slots + frag_slots; } @@ -531,7 +520,7 @@ return rc; } -static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) +static int netvsc_xmit(struct sk_buff *skb, struct net_device *net, bool xdp_tx) { struct net_device_context *net_device_ctx = netdev_priv(net); struct hv_netvsc_packet *packet = NULL; @@ -584,7 +573,7 @@ /* Use the skb control buffer for building up the packet */ BUILD_BUG_ON(sizeof(struct hv_netvsc_packet) > - FIELD_SIZEOF(struct sk_buff, cb)); + sizeof_field(struct sk_buff, cb)); packet = (struct hv_netvsc_packet *)skb->cb; packet->q_idx = skb_get_queue_mapping(skb); @@ -617,6 +606,29 @@ *hash_info = hash; } + /* When using AF_PACKET we need to drop VLAN header from + * the frame and update the SKB to allow the HOST OS + * to transmit the 802.1Q packet + */ + if (skb->protocol == htons(ETH_P_8021Q)) { + u16 vlan_tci; + + skb_reset_mac_header(skb); + if (eth_type_vlan(eth_hdr(skb)->h_proto)) { + if (unlikely(__skb_vlan_pop(skb, &vlan_tci) != 0)) { + ++net_device_ctx->eth_stats.vlan_error; + goto drop; + } + + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tci); + /* Update the NDIS header pkt lengths */ + packet->total_data_buflen -= VLAN_HLEN; + packet->total_bytes -= VLAN_HLEN; + rndis_msg->msg_len = packet->total_data_buflen; + rndis_msg->msg.pkt.data_len = packet->total_data_buflen; + } + } + if (skb_vlan_tag_present(skb)) { struct ndis_pkt_8021q_info *vlan; @@ -625,9 +637,9 @@ IEEE_8021Q_INFO); vlan->value = 0; - vlan->vlanid = skb->vlan_tci & VLAN_VID_MASK; - vlan->pri = (skb->vlan_tci & VLAN_PRIO_MASK) >> - VLAN_PRIO_SHIFT; + vlan->vlanid = skb_vlan_tag_get_id(skb); + vlan->cfi = skb_vlan_tag_get_cfi(skb); + vlan->pri = skb_vlan_tag_get_prio(skb); } if (skb_is_gso(skb)) { @@ -650,10 +662,7 @@ } else { lso_info->lso_v2_transmit.ip_version = NDIS_TCP_LARGE_SEND_OFFLOAD_IPV6; - ipv6_hdr(skb)->payload_len = 0; - tcp_hdr(skb)->check = - ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, - &ipv6_hdr(skb)->daddr, 0, IPPROTO_TCP, 0); + tcp_v6_gso_csum_prep(skb); } lso_info->lso_v2_transmit.tcp_header_offset = skb_transport_offset(skb); lso_info->lso_v2_transmit.mss = skb_shinfo(skb)->gso_size; @@ -699,7 +708,7 @@ /* timestamp packet in software */ skb_tx_timestamp(skb); - ret = netvsc_send(net, packet, rndis_msg, pb, skb); + ret = netvsc_send(net, packet, rndis_msg, pb, skb, xdp_tx); if (likely(ret == 0)) return NETDEV_TX_OK; @@ -722,6 +731,12 @@ goto drop; } +static netdev_tx_t netvsc_start_xmit(struct sk_buff *skb, + struct net_device *ndev) +{ + return netvsc_xmit(skb, ndev, false); +} + /* * netvsc_linkstatus_callback - Link up/down notification */ @@ -732,6 +747,13 @@ struct net_device_context *ndev_ctx = netdev_priv(net); struct netvsc_reconfig *event; unsigned long flags; + + /* Ensure the packet is big enough to access its fields */ + if (resp->msg_len - RNDIS_HEADER_SIZE < sizeof(struct rndis_indicate_status)) { + netdev_err(net, "invalid rndis_indicate_status packet, len: %u\n", + resp->msg_len); + return; + } /* Update the physical link speed when changing to another vSwitch */ if (indicate->status == RNDIS_STATUS_LINK_SPEED_CHANGE) { @@ -764,6 +786,22 @@ schedule_delayed_work(&ndev_ctx->dwork, 0); } +static void netvsc_xdp_xmit(struct sk_buff *skb, struct net_device *ndev) +{ + int rc; + + skb->queue_mapping = skb_get_rx_queue(skb); + __skb_push(skb, ETH_HLEN); + + rc = netvsc_xmit(skb, ndev, true); + + if (dev_xmit_complete(rc)) + return; + + dev_kfree_skb_any(skb); + ndev->stats.tx_dropped++; +} + static void netvsc_comp_ipcsum(struct sk_buff *skb) { struct iphdr *iph = (struct iphdr *)skb->data; @@ -773,22 +811,46 @@ } static struct sk_buff *netvsc_alloc_recv_skb(struct net_device *net, - struct napi_struct *napi, - const struct ndis_tcp_ip_checksum_info *csum_info, - const struct ndis_pkt_8021q_info *vlan, - void *data, u32 buflen) + struct netvsc_channel *nvchan, + struct xdp_buff *xdp) { + struct napi_struct *napi = &nvchan->napi; + const struct ndis_pkt_8021q_info *vlan = nvchan->rsc.vlan; + const struct ndis_tcp_ip_checksum_info *csum_info = + nvchan->rsc.csum_info; + const u32 *hash_info = nvchan->rsc.hash_info; struct sk_buff *skb; + void *xbuf = xdp->data_hard_start; + int i; - skb = napi_alloc_skb(napi, buflen); - if (!skb) - return skb; + if (xbuf) { + unsigned int hdroom = xdp->data - xdp->data_hard_start; + unsigned int xlen = xdp->data_end - xdp->data; + unsigned int frag_size = xdp->frame_sz; - /* - * Copy to skb. This copy is needed here since the memory pointed by - * hv_netvsc_packet cannot be deallocated - */ - skb_put_data(skb, data, buflen); + skb = build_skb(xbuf, frag_size); + + if (!skb) { + __free_page(virt_to_page(xbuf)); + return NULL; + } + + skb_reserve(skb, hdroom); + skb_put(skb, xlen); + skb->dev = napi->dev; + } else { + skb = napi_alloc_skb(napi, nvchan->rsc.pktlen); + + if (!skb) + return NULL; + + /* Copy to skb. This copy is needed here since the memory + * pointed by hv_netvsc_packet cannot be deallocated. + */ + for (i = 0; i < nvchan->rsc.cnt; i++) + skb_put_data(skb, nvchan->rsc.data[i], + nvchan->rsc.len[i]); + } skb->protocol = eth_type_trans(skb, net); @@ -812,8 +874,12 @@ skb->ip_summed = CHECKSUM_UNNECESSARY; } + if (hash_info && (net->features & NETIF_F_RXHASH)) + skb_set_hash(skb, *hash_info, PKT_HASH_TYPE_L4); + if (vlan) { - u16 vlan_tci = vlan->vlanid | (vlan->pri << VLAN_PRIO_SHIFT); + u16 vlan_tci = vlan->vlanid | (vlan->pri << VLAN_PRIO_SHIFT) | + (vlan->cfi ? VLAN_CFI_MASK : 0); __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tci); @@ -828,23 +894,32 @@ */ int netvsc_recv_callback(struct net_device *net, struct netvsc_device *net_device, - struct vmbus_channel *channel, - void *data, u32 len, - const struct ndis_tcp_ip_checksum_info *csum_info, - const struct ndis_pkt_8021q_info *vlan) + struct netvsc_channel *nvchan) { struct net_device_context *net_device_ctx = netdev_priv(net); + struct vmbus_channel *channel = nvchan->channel; u16 q_idx = channel->offermsg.offer.sub_channel_index; - struct netvsc_channel *nvchan = &net_device->chan_table[q_idx]; struct sk_buff *skb; - struct netvsc_stats *rx_stats; + struct netvsc_stats *rx_stats = &nvchan->rx_stats; + struct xdp_buff xdp; + u32 act; if (net->reg_state != NETREG_REGISTERED) return NVSP_STAT_FAIL; + act = netvsc_run_xdp(net, nvchan, &xdp); + + if (act != XDP_PASS && act != XDP_TX) { + u64_stats_update_begin(&rx_stats->syncp); + rx_stats->xdp_drop++; + u64_stats_update_end(&rx_stats->syncp); + + return NVSP_STAT_SUCCESS; /* consumed by XDP */ + } + /* Allocate a skb - TODO direct I/O to pages? */ - skb = netvsc_alloc_recv_skb(net, &nvchan->napi, - csum_info, vlan, data, len); + skb = netvsc_alloc_recv_skb(net, nvchan, &xdp); + if (unlikely(!skb)) { ++net_device_ctx->eth_stats.rx_no_memory; return NVSP_STAT_FAIL; @@ -857,16 +932,20 @@ * on the synthetic device because modifying the VF device * statistics will not work correctly. */ - rx_stats = &nvchan->rx_stats; u64_stats_update_begin(&rx_stats->syncp); rx_stats->packets++; - rx_stats->bytes += len; + rx_stats->bytes += nvchan->rsc.pktlen; if (skb->pkt_type == PACKET_BROADCAST) ++rx_stats->broadcast; else if (skb->pkt_type == PACKET_MULTICAST) ++rx_stats->multicast; u64_stats_update_end(&rx_stats->syncp); + + if (act == XDP_TX) { + netvsc_xdp_xmit(skb, net); + return NVSP_STAT_SUCCESS; + } napi_gro_receive(&nvchan->napi, skb); return NVSP_STAT_SUCCESS; @@ -894,10 +973,11 @@ /* Alloc struct netvsc_device_info, and initialize it from either existing * struct netvsc_device, or from default values. */ -static struct netvsc_device_info *netvsc_devinfo_get - (struct netvsc_device *nvdev) +static +struct netvsc_device_info *netvsc_devinfo_get(struct netvsc_device *nvdev) { struct netvsc_device_info *dev_info; + struct bpf_prog *prog; dev_info = kzalloc(sizeof(*dev_info), GFP_ATOMIC); @@ -905,6 +985,8 @@ return NULL; if (nvdev) { + ASSERT_RTNL(); + dev_info->num_chn = nvdev->num_chn; dev_info->send_sections = nvdev->send_section_cnt; dev_info->send_section_size = nvdev->send_section_size; @@ -913,6 +995,12 @@ memcpy(dev_info->rss_key, nvdev->extension->rss_key, NETVSC_HASH_KEYLEN); + + prog = netvsc_xdp_get(nvdev); + if (prog) { + bpf_prog_inc(prog); + dev_info->bprog = prog; + } } else { dev_info->num_chn = VRSS_CHANNEL_DEFAULT; dev_info->send_sections = NETVSC_DEFAULT_TX; @@ -922,6 +1010,17 @@ } return dev_info; +} + +/* Free struct netvsc_device_info */ +static void netvsc_devinfo_put(struct netvsc_device_info *dev_info) +{ + if (dev_info->bprog) { + ASSERT_RTNL(); + bpf_prog_put(dev_info->bprog); + } + + kfree(dev_info); } static int netvsc_detach(struct net_device *ndev, @@ -934,6 +1033,8 @@ /* Don't try continuing to try and setup sub channels */ if (cancel_work_sync(&nvdev->subchan_work)) nvdev->num_chn = 1; + + netvsc_xdp_set(ndev, NULL, NULL, nvdev); /* If device was up (receiving) then shutdown */ if (netif_running(ndev)) { @@ -968,7 +1069,8 @@ struct hv_device *hdev = ndev_ctx->device_ctx; struct netvsc_device *nvdev; struct rndis_device *rdev; - int ret; + struct bpf_prog *prog; + int ret = 0; nvdev = rndis_filter_device_add(hdev, dev_info); if (IS_ERR(nvdev)) @@ -984,6 +1086,16 @@ } } + prog = dev_info->bprog; + if (prog) { + bpf_prog_inc(prog); + ret = netvsc_xdp_set(ndev, prog, NULL, nvdev); + if (ret) { + bpf_prog_put(prog); + goto err1; + } + } + /* In any case device is now ready */ nvdev->tx_disable = false; netif_device_attach(ndev); @@ -994,7 +1106,7 @@ if (netif_running(ndev)) { ret = rndis_filter_open(nvdev); if (ret) - goto err; + goto err2; rdev = nvdev->extension; if (!rdev->link_state) @@ -1003,9 +1115,10 @@ return 0; -err: +err2: netif_device_detach(ndev); +err1: rndis_filter_device_remove(hdev, nvdev); return ret; @@ -1055,25 +1168,8 @@ } out: - kfree(device_info); + netvsc_devinfo_put(device_info); return ret; -} - -static bool -netvsc_validate_ethtool_ss_cmd(const struct ethtool_link_ksettings *cmd) -{ - struct ethtool_link_ksettings diff1 = *cmd; - struct ethtool_link_ksettings diff2 = {}; - - diff1.base.speed = 0; - diff1.base.duplex = 0; - /* advertising and cmd are usually set */ - ethtool_link_ksettings_zero_link_mode(&diff1, advertising); - diff1.base.cmd = 0; - /* We set port to PORT_OTHER */ - diff2.base.port = PORT_OTHER; - - return !memcmp(&diff1, &diff2, sizeof(diff1)); } static void netvsc_init_settings(struct net_device *dev) @@ -1084,12 +1180,20 @@ ndc->speed = SPEED_UNKNOWN; ndc->duplex = DUPLEX_FULL; + + dev->features = NETIF_F_LRO; } static int netvsc_get_link_ksettings(struct net_device *dev, struct ethtool_link_ksettings *cmd) { struct net_device_context *ndc = netdev_priv(dev); + struct net_device *vf_netdev; + + vf_netdev = rtnl_dereference(ndc->vf_netdev); + + if (vf_netdev) + return __ethtool_get_link_ksettings(vf_netdev, cmd); cmd->base.speed = ndc->speed; cmd->base.duplex = ndc->duplex; @@ -1102,18 +1206,18 @@ const struct ethtool_link_ksettings *cmd) { struct net_device_context *ndc = netdev_priv(dev); - u32 speed; + struct net_device *vf_netdev = rtnl_dereference(ndc->vf_netdev); - speed = cmd->base.speed; - if (!ethtool_validate_speed(speed) || - !ethtool_validate_duplex(cmd->base.duplex) || - !netvsc_validate_ethtool_ss_cmd(cmd)) - return -EINVAL; + if (vf_netdev) { + if (!vf_netdev->ethtool_ops->set_link_ksettings) + return -EOPNOTSUPP; - ndc->speed = speed; - ndc->duplex = cmd->base.duplex; + return vf_netdev->ethtool_ops->set_link_ksettings(vf_netdev, + cmd); + } - return 0; + return ethtool_virtdev_set_link_ksettings(dev, cmd, + &ndc->speed, &ndc->duplex); } static int netvsc_change_mtu(struct net_device *ndev, int mtu) @@ -1160,7 +1264,7 @@ dev_set_mtu(vf_netdev, orig_mtu); out: - kfree(device_info); + netvsc_devinfo_put(device_info); return ret; } @@ -1323,7 +1427,7 @@ return -ENODEV; if (vf_netdev) { - err = dev_set_mac_address(vf_netdev, addr); + err = dev_set_mac_address(vf_netdev, addr, NULL); if (err) return err; } @@ -1334,7 +1438,7 @@ } else if (vf_netdev) { /* rollback change on VF */ memcpy(addr->sa_data, ndev->dev_addr, ETH_ALEN); - dev_set_mac_address(vf_netdev, addr); + dev_set_mac_address(vf_netdev, addr, NULL); } return err; @@ -1354,6 +1458,7 @@ { "rx_no_memory", offsetof(struct netvsc_ethtool_stats, rx_no_memory) }, { "stop_queue", offsetof(struct netvsc_ethtool_stats, stop_queue) }, { "wake_queue", offsetof(struct netvsc_ethtool_stats, wake_queue) }, + { "vlan_error", offsetof(struct netvsc_ethtool_stats, vlan_error) }, }, pcpu_stats[] = { { "cpu%u_rx_packets", offsetof(struct netvsc_ethtool_pcpu_stats, rx_packets) }, @@ -1385,8 +1490,8 @@ /* statistics per queue (rx/tx packets/bytes) */ #define NETVSC_PCPU_STATS_LEN (num_present_cpus() * ARRAY_SIZE(pcpu_stats)) -/* 4 statistics per queue (rx/tx packets/bytes) */ -#define NETVSC_QUEUE_STATS_LEN(dev) ((dev)->num_chn * 4) +/* 5 statistics per queue (rx/tx packets/bytes, rx xdp_drop) */ +#define NETVSC_QUEUE_STATS_LEN(dev) ((dev)->num_chn * 5) static int netvsc_get_sset_count(struct net_device *dev, int string_set) { @@ -1418,6 +1523,7 @@ struct netvsc_ethtool_pcpu_stats *pcpu_sum; unsigned int start; u64 packets, bytes; + u64 xdp_drop; int i, j, cpu; if (!nvdev) @@ -1446,14 +1552,19 @@ start = u64_stats_fetch_begin_irq(&qstats->syncp); packets = qstats->packets; bytes = qstats->bytes; + xdp_drop = qstats->xdp_drop; } while (u64_stats_fetch_retry_irq(&qstats->syncp, start)); data[i++] = packets; data[i++] = bytes; + data[i++] = xdp_drop; } pcpu_sum = kvmalloc_array(num_possible_cpus(), sizeof(struct netvsc_ethtool_pcpu_stats), GFP_KERNEL); + if (!pcpu_sum) + return; + netvsc_get_pcpu_stats(dev, pcpu_sum); for_each_present_cpu(cpu) { struct netvsc_ethtool_pcpu_stats *this_sum = &pcpu_sum[cpu]; @@ -1495,6 +1606,8 @@ sprintf(p, "rx_queue_%u_packets", i); p += ETH_GSTRING_LEN; sprintf(p, "rx_queue_%u_bytes", i); + p += ETH_GSTRING_LEN; + sprintf(p, "rx_queue_%u_xdp_drop", i); p += ETH_GSTRING_LEN; } @@ -1641,26 +1754,6 @@ return -EOPNOTSUPP; } - -#ifdef CONFIG_NET_POLL_CONTROLLER -static void netvsc_poll_controller(struct net_device *dev) -{ - struct net_device_context *ndc = netdev_priv(dev); - struct netvsc_device *ndev; - int i; - - rcu_read_lock(); - ndev = rcu_dereference(ndc->nvdev); - if (ndev) { - for (i = 0; i < ndev->num_chn; i++) { - struct netvsc_channel *nvchan = &ndev->chan_table[i]; - - napi_schedule(&nvchan->napi); - } - } - rcu_read_unlock(); -} -#endif static u32 netvsc_get_rxfh_key_size(struct net_device *dev) { @@ -1812,8 +1905,85 @@ } out: - kfree(device_info); + netvsc_devinfo_put(device_info); return ret; +} + +static netdev_features_t netvsc_fix_features(struct net_device *ndev, + netdev_features_t features) +{ + struct net_device_context *ndevctx = netdev_priv(ndev); + struct netvsc_device *nvdev = rtnl_dereference(ndevctx->nvdev); + + if (!nvdev || nvdev->destroy) + return features; + + if ((features & NETIF_F_LRO) && netvsc_xdp_get(nvdev)) { + features ^= NETIF_F_LRO; + netdev_info(ndev, "Skip LRO - unsupported with XDP\n"); + } + + return features; +} + +static int netvsc_set_features(struct net_device *ndev, + netdev_features_t features) +{ + netdev_features_t change = features ^ ndev->features; + struct net_device_context *ndevctx = netdev_priv(ndev); + struct netvsc_device *nvdev = rtnl_dereference(ndevctx->nvdev); + struct net_device *vf_netdev = rtnl_dereference(ndevctx->vf_netdev); + struct ndis_offload_params offloads; + int ret = 0; + + if (!nvdev || nvdev->destroy) + return -ENODEV; + + if (!(change & NETIF_F_LRO)) + goto syncvf; + + memset(&offloads, 0, sizeof(struct ndis_offload_params)); + + if (features & NETIF_F_LRO) { + offloads.rsc_ip_v4 = NDIS_OFFLOAD_PARAMETERS_RSC_ENABLED; + offloads.rsc_ip_v6 = NDIS_OFFLOAD_PARAMETERS_RSC_ENABLED; + } else { + offloads.rsc_ip_v4 = NDIS_OFFLOAD_PARAMETERS_RSC_DISABLED; + offloads.rsc_ip_v6 = NDIS_OFFLOAD_PARAMETERS_RSC_DISABLED; + } + + ret = rndis_filter_set_offload_params(ndev, nvdev, &offloads); + + if (ret) { + features ^= NETIF_F_LRO; + ndev->features = features; + } + +syncvf: + if (!vf_netdev) + return ret; + + vf_netdev->wanted_features = features; + netdev_update_features(vf_netdev); + + return ret; +} + +static int netvsc_get_regs_len(struct net_device *netdev) +{ + return VRSS_SEND_TAB_SIZE * sizeof(u32); +} + +static void netvsc_get_regs(struct net_device *netdev, + struct ethtool_regs *regs, void *p) +{ + struct net_device_context *ndc = netdev_priv(netdev); + u32 *regs_buff = p; + + /* increase the version, if buffer format is changed. */ + regs->version = 1; + + memcpy(regs_buff, ndc->tx_table, VRSS_SEND_TAB_SIZE * sizeof(u32)); } static u32 netvsc_get_msglevel(struct net_device *ndev) @@ -1832,6 +2002,8 @@ static const struct ethtool_ops ethtool_ops = { .get_drvinfo = netvsc_get_drvinfo, + .get_regs_len = netvsc_get_regs_len, + .get_regs = netvsc_get_regs, .get_msglevel = netvsc_get_msglevel, .set_msglevel = netvsc_set_msglevel, .get_link = ethtool_op_get_link, @@ -1859,14 +2031,14 @@ .ndo_start_xmit = netvsc_start_xmit, .ndo_change_rx_flags = netvsc_change_rx_flags, .ndo_set_rx_mode = netvsc_set_rx_mode, + .ndo_fix_features = netvsc_fix_features, + .ndo_set_features = netvsc_set_features, .ndo_change_mtu = netvsc_change_mtu, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = netvsc_set_mac_addr, .ndo_select_queue = netvsc_select_queue, .ndo_get_stats64 = netvsc_get_stats64, -#ifdef CONFIG_NET_POLL_CONTROLLER - .ndo_poll_controller = netvsc_poll_controller, -#endif + .ndo_bpf = netvsc_bpf, }; /* @@ -2073,7 +2245,7 @@ "unable to change mtu to %u\n", ndev->mtu); /* set multicast etc flags on VF */ - dev_change_flags(vf_netdev, ndev->flags | IFF_SLAVE); + dev_change_flags(vf_netdev, ndev->flags | IFF_SLAVE, NULL); /* sync address list from ndev to VF */ netif_addr_lock_bh(ndev); @@ -2082,7 +2254,7 @@ netif_addr_unlock_bh(ndev); if (netif_running(ndev)) { - ret = dev_open(vf_netdev); + ret = dev_open(vf_netdev, NULL); if (ret) netdev_warn(vf_netdev, "unable to open: %d\n", ret); @@ -2118,6 +2290,7 @@ { struct device *parent = vf_netdev->dev.parent; struct net_device_context *ndev_ctx; + struct net_device *ndev; struct pci_dev *pdev; u32 serial; @@ -2144,6 +2317,18 @@ return hv_get_drvdata(ndev_ctx->device_ctx); } + /* Fallback path to check synthetic vf with + * help of mac addr + */ + list_for_each_entry(ndev_ctx, &netvsc_dev_list, list) { + ndev = hv_get_drvdata(ndev_ctx->device_ctx); + if (ether_addr_equal(vf_netdev->perm_addr, ndev->perm_addr)) { + netdev_notice(vf_netdev, + "falling back to mac addr based matching\n"); + return ndev; + } + } + netdev_notice(vf_netdev, "no netdev found for vf serial:%u\n", serial); return NULL; @@ -2153,6 +2338,7 @@ { struct net_device_context *net_device_ctx; struct netvsc_device *netvsc_dev; + struct bpf_prog *prog; struct net_device *ndev; int ret; @@ -2168,7 +2354,7 @@ if (!netvsc_dev || rtnl_dereference(net_device_ctx->vf_netdev)) return NOTIFY_DONE; - /* if syntihetic interface is a different namespace, + /* if synthetic interface is a different namespace, * then move the VF to that namespace; join will be * done again in that context. */ @@ -2193,10 +2379,26 @@ dev_hold(vf_netdev); rcu_assign_pointer(net_device_ctx->vf_netdev, vf_netdev); + + vf_netdev->wanted_features = ndev->features; + netdev_update_features(vf_netdev); + + prog = netvsc_xdp_get(netvsc_dev); + netvsc_vf_setxdp(vf_netdev, prog); + return NOTIFY_OK; } -/* VF up/down change detected, schedule to change data path */ +/* Change the data path when VF UP/DOWN/CHANGE are detected. + * + * Typically a UP or DOWN event is followed by a CHANGE event, so + * net_device_ctx->data_path_is_vf is used to cache the current data path + * to avoid the duplicate call of netvsc_switch_datapath() and the duplicate + * message. + * + * During hibernation, if a VF NIC driver (e.g. mlx5) preserves the network + * interface, there is only the CHANGE event and no UP or DOWN event. + */ static int netvsc_vf_changed(struct net_device *vf_netdev) { struct net_device_context *net_device_ctx; @@ -2212,6 +2414,15 @@ netvsc_dev = rtnl_dereference(net_device_ctx->nvdev); if (!netvsc_dev) return NOTIFY_DONE; + + if (net_device_ctx->data_path_is_vf == vf_is_up) + return NOTIFY_OK; + net_device_ctx->data_path_is_vf = vf_is_up; + + if (vf_is_up && !net_device_ctx->vf_alloc) { + netdev_info(ndev, "Waiting for the VF association from host\n"); + wait_for_completion(&net_device_ctx->vf_add); + } netvsc_switch_datapath(ndev, vf_is_up); netdev_info(ndev, "Data path switched %s VF: %s\n", @@ -2234,6 +2445,9 @@ netdev_info(ndev, "VF unregistering: %s\n", vf_netdev->name); + netvsc_vf_setxdp(vf_netdev, NULL); + + reinit_completion(&net_device_ctx->vf_add); netdev_rx_handler_unregister(vf_netdev); netdev_upper_dev_unlink(vf_netdev, ndev); RCU_INIT_POINTER(net_device_ctx->vf_netdev, NULL); @@ -2271,6 +2485,7 @@ INIT_DELAYED_WORK(&net_device_ctx->dwork, netvsc_link_change); + init_completion(&net_device_ctx->vf_add); spin_lock_init(&net_device_ctx->lock); INIT_LIST_HEAD(&net_device_ctx->reconfig_events); INIT_DELAYED_WORK(&net_device_ctx->vf_takeover, netvsc_vf_setup); @@ -2316,7 +2531,7 @@ * netvsc_probe() can't get rtnl lock and as a result vmbus_onoffer() * -> ... -> device_add() -> ... -> __device_attach() can't get * the device lock, so all the subchannels can't be processed -- - * finally netvsc_subchan_work() hangs for ever. + * finally netvsc_subchan_work() hangs forever. */ rtnl_lock(); @@ -2325,8 +2540,8 @@ /* hw_features computed in rndis_netdev_set_hwcaps() */ net->features = net->hw_features | - NETIF_F_HIGHDMA | NETIF_F_SG | - NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX; + NETIF_F_HIGHDMA | NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_CTAG_RX; net->vlan_features = net->features; netdev_lockdep_set_classes(net); @@ -2349,14 +2564,14 @@ list_add(&net_device_ctx->list, &netvsc_dev_list); rtnl_unlock(); - kfree(device_info); + netvsc_devinfo_put(device_info); return 0; register_failed: rtnl_unlock(); rndis_filter_device_remove(dev, nvdev); rndis_failed: - kfree(device_info); + netvsc_devinfo_put(device_info); devinfo_failed: free_percpu(net_device_ctx->vf_stats); no_stats: @@ -2384,8 +2599,10 @@ rtnl_lock(); nvdev = rtnl_dereference(ndev_ctx->nvdev); - if (nvdev) + if (nvdev) { cancel_work_sync(&nvdev->subchan_work); + netvsc_xdp_set(net, NULL, NULL, nvdev); + } /* * Call to the vsc driver to let it know that the device is being @@ -2410,6 +2627,66 @@ return 0; } +static int netvsc_suspend(struct hv_device *dev) +{ + struct net_device_context *ndev_ctx; + struct netvsc_device *nvdev; + struct net_device *net; + int ret; + + net = hv_get_drvdata(dev); + + ndev_ctx = netdev_priv(net); + cancel_delayed_work_sync(&ndev_ctx->dwork); + + rtnl_lock(); + + nvdev = rtnl_dereference(ndev_ctx->nvdev); + if (nvdev == NULL) { + ret = -ENODEV; + goto out; + } + + /* Save the current config info */ + ndev_ctx->saved_netvsc_dev_info = netvsc_devinfo_get(nvdev); + if (!ndev_ctx->saved_netvsc_dev_info) { + ret = -ENOMEM; + goto out; + } + ret = netvsc_detach(net, nvdev); +out: + rtnl_unlock(); + + return ret; +} + +static int netvsc_resume(struct hv_device *dev) +{ + struct net_device *net = hv_get_drvdata(dev); + struct net_device_context *net_device_ctx; + struct netvsc_device_info *device_info; + int ret; + + rtnl_lock(); + + net_device_ctx = netdev_priv(net); + + /* Reset the data path to the netvsc NIC before re-opening the vmbus + * channel. Later netvsc_netdev_event() will switch the data path to + * the VF upon the UP or CHANGE event. + */ + net_device_ctx->data_path_is_vf = false; + device_info = net_device_ctx->saved_netvsc_dev_info; + + ret = netvsc_attach(net, device_info); + + netvsc_devinfo_put(device_info); + net_device_ctx->saved_netvsc_dev_info = NULL; + + rtnl_unlock(); + + return ret; +} static const struct hv_vmbus_device_id id_table[] = { /* Network guid */ { HV_NIC_GUID, }, @@ -2424,6 +2701,8 @@ .id_table = id_table, .probe = netvsc_probe, .remove = netvsc_remove, + .suspend = netvsc_suspend, + .resume = netvsc_resume, .driver = { .probe_type = PROBE_FORCE_SYNCHRONOUS, }, @@ -2464,6 +2743,7 @@ return netvsc_unregister_vf(event_dev); case NETDEV_UP: case NETDEV_DOWN: + case NETDEV_CHANGE: return netvsc_vf_changed(event_dev); default: return NOTIFY_DONE; -- Gitblit v1.6.2