From 072de836f53be56a70cecf70b43ae43b7ce17376 Mon Sep 17 00:00:00 2001
From: hc <hc@nodka.com>
Date: Mon, 11 Dec 2023 10:08:36 +0000
Subject: [PATCH] mk-rootfs.sh
---
kernel/drivers/net/hyperv/netvsc_drv.c | 574 ++++++++++++++++++++++++++++++++++++++++++--------------
1 files changed, 427 insertions(+), 147 deletions(-)
diff --git a/kernel/drivers/net/hyperv/netvsc_drv.c b/kernel/drivers/net/hyperv/netvsc_drv.c
index 2dff0e1..f2020be 100644
--- a/kernel/drivers/net/hyperv/netvsc_drv.c
+++ b/kernel/drivers/net/hyperv/netvsc_drv.c
@@ -1,17 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2009, Microsoft Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, see <http://www.gnu.org/licenses/>.
*
* Authors:
* Haiyang Zhang <haiyangz@microsoft.com>
@@ -36,6 +25,7 @@
#include <linux/slab.h>
#include <linux/rtnetlink.h>
#include <linux/netpoll.h>
+#include <linux/bpf.h>
#include <net/arp.h>
#include <net/route.h>
@@ -146,7 +136,7 @@
* slave as up. If open fails, then slave will be
* still be offline (and not used).
*/
- ret = dev_open(vf_netdev);
+ ret = dev_open(vf_netdev, NULL);
if (ret)
netdev_warn(net,
"unable to open slave: %s: %d\n",
@@ -246,6 +236,7 @@
ppi->size = ppi_size;
ppi->type = pkt_type;
+ ppi->internal = 0;
ppi->ppi_offset = sizeof(struct rndis_per_packet_info);
rndis_pkt->per_pkt_info_len += ppi_size;
@@ -327,7 +318,7 @@
* If a valid queue has already been assigned, then use that.
* Otherwise compute tx queue based on hash and the send table.
*
- * This is basically similar to default (__netdev_pick_tx) with the added step
+ * This is basically similar to default (netdev_pick_tx) with the added step
* of using the host send_table when no other queue has been assigned.
*
* TODO support XPS - but get_xps_queue not exported
@@ -350,8 +341,7 @@
}
static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb,
- struct net_device *sb_dev,
- select_queue_fallback_t fallback)
+ struct net_device *sb_dev)
{
struct net_device_context *ndc = netdev_priv(ndev);
struct net_device *vf_netdev;
@@ -363,10 +353,9 @@
const struct net_device_ops *vf_ops = vf_netdev->netdev_ops;
if (vf_ops->ndo_select_queue)
- txq = vf_ops->ndo_select_queue(vf_netdev, skb,
- sb_dev, fallback);
+ txq = vf_ops->ndo_select_queue(vf_netdev, skb, sb_dev);
else
- txq = fallback(vf_netdev, skb, NULL);
+ txq = netdev_pick_tx(vf_netdev, skb, NULL);
/* Record the queue selected by VF so that it can be
* used for common case where VF has more queues than
@@ -384,32 +373,29 @@
return txq;
}
-static u32 fill_pg_buf(struct page *page, u32 offset, u32 len,
+static u32 fill_pg_buf(unsigned long hvpfn, u32 offset, u32 len,
struct hv_page_buffer *pb)
{
int j = 0;
- /* Deal with compund pages by ignoring unused part
- * of the page.
- */
- page += (offset >> PAGE_SHIFT);
- offset &= ~PAGE_MASK;
+ hvpfn += offset >> HV_HYP_PAGE_SHIFT;
+ offset = offset & ~HV_HYP_PAGE_MASK;
while (len > 0) {
unsigned long bytes;
- bytes = PAGE_SIZE - offset;
+ bytes = HV_HYP_PAGE_SIZE - offset;
if (bytes > len)
bytes = len;
- pb[j].pfn = page_to_pfn(page);
+ pb[j].pfn = hvpfn;
pb[j].offset = offset;
pb[j].len = bytes;
offset += bytes;
len -= bytes;
- if (offset == PAGE_SIZE && len) {
- page++;
+ if (offset == HV_HYP_PAGE_SIZE && len) {
+ hvpfn++;
offset = 0;
j++;
}
@@ -432,23 +418,26 @@
* 2. skb linear data
* 3. skb fragment data
*/
- slots_used += fill_pg_buf(virt_to_page(hdr),
- offset_in_page(hdr),
- len, &pb[slots_used]);
+ slots_used += fill_pg_buf(virt_to_hvpfn(hdr),
+ offset_in_hvpage(hdr),
+ len,
+ &pb[slots_used]);
packet->rmsg_size = len;
packet->rmsg_pgcnt = slots_used;
- slots_used += fill_pg_buf(virt_to_page(data),
- offset_in_page(data),
- skb_headlen(skb), &pb[slots_used]);
+ slots_used += fill_pg_buf(virt_to_hvpfn(data),
+ offset_in_hvpage(data),
+ skb_headlen(skb),
+ &pb[slots_used]);
for (i = 0; i < frags; i++) {
skb_frag_t *frag = skb_shinfo(skb)->frags + i;
- slots_used += fill_pg_buf(skb_frag_page(frag),
- frag->page_offset,
- skb_frag_size(frag), &pb[slots_used]);
+ slots_used += fill_pg_buf(page_to_hvpfn(skb_frag_page(frag)),
+ skb_frag_off(frag),
+ skb_frag_size(frag),
+ &pb[slots_used]);
}
return slots_used;
}
@@ -461,11 +450,11 @@
for (i = 0; i < frags; i++) {
skb_frag_t *frag = skb_shinfo(skb)->frags + i;
unsigned long size = skb_frag_size(frag);
- unsigned long offset = frag->page_offset;
+ unsigned long offset = skb_frag_off(frag);
/* Skip unused frames from start of page */
- offset &= ~PAGE_MASK;
- pages += PFN_UP(offset + size);
+ offset &= ~HV_HYP_PAGE_MASK;
+ pages += HVPFN_UP(offset + size);
}
return pages;
}
@@ -473,12 +462,12 @@
static int netvsc_get_slots(struct sk_buff *skb)
{
char *data = skb->data;
- unsigned int offset = offset_in_page(data);
+ unsigned int offset = offset_in_hvpage(data);
unsigned int len = skb_headlen(skb);
int slots;
int frag_slots;
- slots = DIV_ROUND_UP(offset + len, PAGE_SIZE);
+ slots = DIV_ROUND_UP(offset + len, HV_HYP_PAGE_SIZE);
frag_slots = count_skb_frag_slots(skb);
return slots + frag_slots;
}
@@ -531,7 +520,7 @@
return rc;
}
-static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
+static int netvsc_xmit(struct sk_buff *skb, struct net_device *net, bool xdp_tx)
{
struct net_device_context *net_device_ctx = netdev_priv(net);
struct hv_netvsc_packet *packet = NULL;
@@ -584,7 +573,7 @@
/* Use the skb control buffer for building up the packet */
BUILD_BUG_ON(sizeof(struct hv_netvsc_packet) >
- FIELD_SIZEOF(struct sk_buff, cb));
+ sizeof_field(struct sk_buff, cb));
packet = (struct hv_netvsc_packet *)skb->cb;
packet->q_idx = skb_get_queue_mapping(skb);
@@ -617,6 +606,29 @@
*hash_info = hash;
}
+ /* When using AF_PACKET we need to drop VLAN header from
+ * the frame and update the SKB to allow the HOST OS
+ * to transmit the 802.1Q packet
+ */
+ if (skb->protocol == htons(ETH_P_8021Q)) {
+ u16 vlan_tci;
+
+ skb_reset_mac_header(skb);
+ if (eth_type_vlan(eth_hdr(skb)->h_proto)) {
+ if (unlikely(__skb_vlan_pop(skb, &vlan_tci) != 0)) {
+ ++net_device_ctx->eth_stats.vlan_error;
+ goto drop;
+ }
+
+ __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tci);
+ /* Update the NDIS header pkt lengths */
+ packet->total_data_buflen -= VLAN_HLEN;
+ packet->total_bytes -= VLAN_HLEN;
+ rndis_msg->msg_len = packet->total_data_buflen;
+ rndis_msg->msg.pkt.data_len = packet->total_data_buflen;
+ }
+ }
+
if (skb_vlan_tag_present(skb)) {
struct ndis_pkt_8021q_info *vlan;
@@ -625,9 +637,9 @@
IEEE_8021Q_INFO);
vlan->value = 0;
- vlan->vlanid = skb->vlan_tci & VLAN_VID_MASK;
- vlan->pri = (skb->vlan_tci & VLAN_PRIO_MASK) >>
- VLAN_PRIO_SHIFT;
+ vlan->vlanid = skb_vlan_tag_get_id(skb);
+ vlan->cfi = skb_vlan_tag_get_cfi(skb);
+ vlan->pri = skb_vlan_tag_get_prio(skb);
}
if (skb_is_gso(skb)) {
@@ -650,10 +662,7 @@
} else {
lso_info->lso_v2_transmit.ip_version =
NDIS_TCP_LARGE_SEND_OFFLOAD_IPV6;
- ipv6_hdr(skb)->payload_len = 0;
- tcp_hdr(skb)->check =
- ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
- &ipv6_hdr(skb)->daddr, 0, IPPROTO_TCP, 0);
+ tcp_v6_gso_csum_prep(skb);
}
lso_info->lso_v2_transmit.tcp_header_offset = skb_transport_offset(skb);
lso_info->lso_v2_transmit.mss = skb_shinfo(skb)->gso_size;
@@ -699,7 +708,7 @@
/* timestamp packet in software */
skb_tx_timestamp(skb);
- ret = netvsc_send(net, packet, rndis_msg, pb, skb);
+ ret = netvsc_send(net, packet, rndis_msg, pb, skb, xdp_tx);
if (likely(ret == 0))
return NETDEV_TX_OK;
@@ -722,6 +731,12 @@
goto drop;
}
+static netdev_tx_t netvsc_start_xmit(struct sk_buff *skb,
+ struct net_device *ndev)
+{
+ return netvsc_xmit(skb, ndev, false);
+}
+
/*
* netvsc_linkstatus_callback - Link up/down notification
*/
@@ -732,6 +747,13 @@
struct net_device_context *ndev_ctx = netdev_priv(net);
struct netvsc_reconfig *event;
unsigned long flags;
+
+ /* Ensure the packet is big enough to access its fields */
+ if (resp->msg_len - RNDIS_HEADER_SIZE < sizeof(struct rndis_indicate_status)) {
+ netdev_err(net, "invalid rndis_indicate_status packet, len: %u\n",
+ resp->msg_len);
+ return;
+ }
/* Update the physical link speed when changing to another vSwitch */
if (indicate->status == RNDIS_STATUS_LINK_SPEED_CHANGE) {
@@ -764,6 +786,22 @@
schedule_delayed_work(&ndev_ctx->dwork, 0);
}
+static void netvsc_xdp_xmit(struct sk_buff *skb, struct net_device *ndev)
+{
+ int rc;
+
+ skb->queue_mapping = skb_get_rx_queue(skb);
+ __skb_push(skb, ETH_HLEN);
+
+ rc = netvsc_xmit(skb, ndev, true);
+
+ if (dev_xmit_complete(rc))
+ return;
+
+ dev_kfree_skb_any(skb);
+ ndev->stats.tx_dropped++;
+}
+
static void netvsc_comp_ipcsum(struct sk_buff *skb)
{
struct iphdr *iph = (struct iphdr *)skb->data;
@@ -773,22 +811,46 @@
}
static struct sk_buff *netvsc_alloc_recv_skb(struct net_device *net,
- struct napi_struct *napi,
- const struct ndis_tcp_ip_checksum_info *csum_info,
- const struct ndis_pkt_8021q_info *vlan,
- void *data, u32 buflen)
+ struct netvsc_channel *nvchan,
+ struct xdp_buff *xdp)
{
+ struct napi_struct *napi = &nvchan->napi;
+ const struct ndis_pkt_8021q_info *vlan = nvchan->rsc.vlan;
+ const struct ndis_tcp_ip_checksum_info *csum_info =
+ nvchan->rsc.csum_info;
+ const u32 *hash_info = nvchan->rsc.hash_info;
struct sk_buff *skb;
+ void *xbuf = xdp->data_hard_start;
+ int i;
- skb = napi_alloc_skb(napi, buflen);
- if (!skb)
- return skb;
+ if (xbuf) {
+ unsigned int hdroom = xdp->data - xdp->data_hard_start;
+ unsigned int xlen = xdp->data_end - xdp->data;
+ unsigned int frag_size = xdp->frame_sz;
- /*
- * Copy to skb. This copy is needed here since the memory pointed by
- * hv_netvsc_packet cannot be deallocated
- */
- skb_put_data(skb, data, buflen);
+ skb = build_skb(xbuf, frag_size);
+
+ if (!skb) {
+ __free_page(virt_to_page(xbuf));
+ return NULL;
+ }
+
+ skb_reserve(skb, hdroom);
+ skb_put(skb, xlen);
+ skb->dev = napi->dev;
+ } else {
+ skb = napi_alloc_skb(napi, nvchan->rsc.pktlen);
+
+ if (!skb)
+ return NULL;
+
+ /* Copy to skb. This copy is needed here since the memory
+ * pointed by hv_netvsc_packet cannot be deallocated.
+ */
+ for (i = 0; i < nvchan->rsc.cnt; i++)
+ skb_put_data(skb, nvchan->rsc.data[i],
+ nvchan->rsc.len[i]);
+ }
skb->protocol = eth_type_trans(skb, net);
@@ -812,8 +874,12 @@
skb->ip_summed = CHECKSUM_UNNECESSARY;
}
+ if (hash_info && (net->features & NETIF_F_RXHASH))
+ skb_set_hash(skb, *hash_info, PKT_HASH_TYPE_L4);
+
if (vlan) {
- u16 vlan_tci = vlan->vlanid | (vlan->pri << VLAN_PRIO_SHIFT);
+ u16 vlan_tci = vlan->vlanid | (vlan->pri << VLAN_PRIO_SHIFT) |
+ (vlan->cfi ? VLAN_CFI_MASK : 0);
__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
vlan_tci);
@@ -828,23 +894,32 @@
*/
int netvsc_recv_callback(struct net_device *net,
struct netvsc_device *net_device,
- struct vmbus_channel *channel,
- void *data, u32 len,
- const struct ndis_tcp_ip_checksum_info *csum_info,
- const struct ndis_pkt_8021q_info *vlan)
+ struct netvsc_channel *nvchan)
{
struct net_device_context *net_device_ctx = netdev_priv(net);
+ struct vmbus_channel *channel = nvchan->channel;
u16 q_idx = channel->offermsg.offer.sub_channel_index;
- struct netvsc_channel *nvchan = &net_device->chan_table[q_idx];
struct sk_buff *skb;
- struct netvsc_stats *rx_stats;
+ struct netvsc_stats *rx_stats = &nvchan->rx_stats;
+ struct xdp_buff xdp;
+ u32 act;
if (net->reg_state != NETREG_REGISTERED)
return NVSP_STAT_FAIL;
+ act = netvsc_run_xdp(net, nvchan, &xdp);
+
+ if (act != XDP_PASS && act != XDP_TX) {
+ u64_stats_update_begin(&rx_stats->syncp);
+ rx_stats->xdp_drop++;
+ u64_stats_update_end(&rx_stats->syncp);
+
+ return NVSP_STAT_SUCCESS; /* consumed by XDP */
+ }
+
/* Allocate a skb - TODO direct I/O to pages? */
- skb = netvsc_alloc_recv_skb(net, &nvchan->napi,
- csum_info, vlan, data, len);
+ skb = netvsc_alloc_recv_skb(net, nvchan, &xdp);
+
if (unlikely(!skb)) {
++net_device_ctx->eth_stats.rx_no_memory;
return NVSP_STAT_FAIL;
@@ -857,16 +932,20 @@
* on the synthetic device because modifying the VF device
* statistics will not work correctly.
*/
- rx_stats = &nvchan->rx_stats;
u64_stats_update_begin(&rx_stats->syncp);
rx_stats->packets++;
- rx_stats->bytes += len;
+ rx_stats->bytes += nvchan->rsc.pktlen;
if (skb->pkt_type == PACKET_BROADCAST)
++rx_stats->broadcast;
else if (skb->pkt_type == PACKET_MULTICAST)
++rx_stats->multicast;
u64_stats_update_end(&rx_stats->syncp);
+
+ if (act == XDP_TX) {
+ netvsc_xdp_xmit(skb, net);
+ return NVSP_STAT_SUCCESS;
+ }
napi_gro_receive(&nvchan->napi, skb);
return NVSP_STAT_SUCCESS;
@@ -894,10 +973,11 @@
/* Alloc struct netvsc_device_info, and initialize it from either existing
* struct netvsc_device, or from default values.
*/
-static struct netvsc_device_info *netvsc_devinfo_get
- (struct netvsc_device *nvdev)
+static
+struct netvsc_device_info *netvsc_devinfo_get(struct netvsc_device *nvdev)
{
struct netvsc_device_info *dev_info;
+ struct bpf_prog *prog;
dev_info = kzalloc(sizeof(*dev_info), GFP_ATOMIC);
@@ -905,6 +985,8 @@
return NULL;
if (nvdev) {
+ ASSERT_RTNL();
+
dev_info->num_chn = nvdev->num_chn;
dev_info->send_sections = nvdev->send_section_cnt;
dev_info->send_section_size = nvdev->send_section_size;
@@ -913,6 +995,12 @@
memcpy(dev_info->rss_key, nvdev->extension->rss_key,
NETVSC_HASH_KEYLEN);
+
+ prog = netvsc_xdp_get(nvdev);
+ if (prog) {
+ bpf_prog_inc(prog);
+ dev_info->bprog = prog;
+ }
} else {
dev_info->num_chn = VRSS_CHANNEL_DEFAULT;
dev_info->send_sections = NETVSC_DEFAULT_TX;
@@ -922,6 +1010,17 @@
}
return dev_info;
+}
+
+/* Free struct netvsc_device_info */
+static void netvsc_devinfo_put(struct netvsc_device_info *dev_info)
+{
+ if (dev_info->bprog) {
+ ASSERT_RTNL();
+ bpf_prog_put(dev_info->bprog);
+ }
+
+ kfree(dev_info);
}
static int netvsc_detach(struct net_device *ndev,
@@ -934,6 +1033,8 @@
/* Don't try continuing to try and setup sub channels */
if (cancel_work_sync(&nvdev->subchan_work))
nvdev->num_chn = 1;
+
+ netvsc_xdp_set(ndev, NULL, NULL, nvdev);
/* If device was up (receiving) then shutdown */
if (netif_running(ndev)) {
@@ -968,7 +1069,8 @@
struct hv_device *hdev = ndev_ctx->device_ctx;
struct netvsc_device *nvdev;
struct rndis_device *rdev;
- int ret;
+ struct bpf_prog *prog;
+ int ret = 0;
nvdev = rndis_filter_device_add(hdev, dev_info);
if (IS_ERR(nvdev))
@@ -984,6 +1086,16 @@
}
}
+ prog = dev_info->bprog;
+ if (prog) {
+ bpf_prog_inc(prog);
+ ret = netvsc_xdp_set(ndev, prog, NULL, nvdev);
+ if (ret) {
+ bpf_prog_put(prog);
+ goto err1;
+ }
+ }
+
/* In any case device is now ready */
nvdev->tx_disable = false;
netif_device_attach(ndev);
@@ -994,7 +1106,7 @@
if (netif_running(ndev)) {
ret = rndis_filter_open(nvdev);
if (ret)
- goto err;
+ goto err2;
rdev = nvdev->extension;
if (!rdev->link_state)
@@ -1003,9 +1115,10 @@
return 0;
-err:
+err2:
netif_device_detach(ndev);
+err1:
rndis_filter_device_remove(hdev, nvdev);
return ret;
@@ -1055,25 +1168,8 @@
}
out:
- kfree(device_info);
+ netvsc_devinfo_put(device_info);
return ret;
-}
-
-static bool
-netvsc_validate_ethtool_ss_cmd(const struct ethtool_link_ksettings *cmd)
-{
- struct ethtool_link_ksettings diff1 = *cmd;
- struct ethtool_link_ksettings diff2 = {};
-
- diff1.base.speed = 0;
- diff1.base.duplex = 0;
- /* advertising and cmd are usually set */
- ethtool_link_ksettings_zero_link_mode(&diff1, advertising);
- diff1.base.cmd = 0;
- /* We set port to PORT_OTHER */
- diff2.base.port = PORT_OTHER;
-
- return !memcmp(&diff1, &diff2, sizeof(diff1));
}
static void netvsc_init_settings(struct net_device *dev)
@@ -1084,12 +1180,20 @@
ndc->speed = SPEED_UNKNOWN;
ndc->duplex = DUPLEX_FULL;
+
+ dev->features = NETIF_F_LRO;
}
static int netvsc_get_link_ksettings(struct net_device *dev,
struct ethtool_link_ksettings *cmd)
{
struct net_device_context *ndc = netdev_priv(dev);
+ struct net_device *vf_netdev;
+
+ vf_netdev = rtnl_dereference(ndc->vf_netdev);
+
+ if (vf_netdev)
+ return __ethtool_get_link_ksettings(vf_netdev, cmd);
cmd->base.speed = ndc->speed;
cmd->base.duplex = ndc->duplex;
@@ -1102,18 +1206,18 @@
const struct ethtool_link_ksettings *cmd)
{
struct net_device_context *ndc = netdev_priv(dev);
- u32 speed;
+ struct net_device *vf_netdev = rtnl_dereference(ndc->vf_netdev);
- speed = cmd->base.speed;
- if (!ethtool_validate_speed(speed) ||
- !ethtool_validate_duplex(cmd->base.duplex) ||
- !netvsc_validate_ethtool_ss_cmd(cmd))
- return -EINVAL;
+ if (vf_netdev) {
+ if (!vf_netdev->ethtool_ops->set_link_ksettings)
+ return -EOPNOTSUPP;
- ndc->speed = speed;
- ndc->duplex = cmd->base.duplex;
+ return vf_netdev->ethtool_ops->set_link_ksettings(vf_netdev,
+ cmd);
+ }
- return 0;
+ return ethtool_virtdev_set_link_ksettings(dev, cmd,
+ &ndc->speed, &ndc->duplex);
}
static int netvsc_change_mtu(struct net_device *ndev, int mtu)
@@ -1160,7 +1264,7 @@
dev_set_mtu(vf_netdev, orig_mtu);
out:
- kfree(device_info);
+ netvsc_devinfo_put(device_info);
return ret;
}
@@ -1323,7 +1427,7 @@
return -ENODEV;
if (vf_netdev) {
- err = dev_set_mac_address(vf_netdev, addr);
+ err = dev_set_mac_address(vf_netdev, addr, NULL);
if (err)
return err;
}
@@ -1334,7 +1438,7 @@
} else if (vf_netdev) {
/* rollback change on VF */
memcpy(addr->sa_data, ndev->dev_addr, ETH_ALEN);
- dev_set_mac_address(vf_netdev, addr);
+ dev_set_mac_address(vf_netdev, addr, NULL);
}
return err;
@@ -1354,6 +1458,7 @@
{ "rx_no_memory", offsetof(struct netvsc_ethtool_stats, rx_no_memory) },
{ "stop_queue", offsetof(struct netvsc_ethtool_stats, stop_queue) },
{ "wake_queue", offsetof(struct netvsc_ethtool_stats, wake_queue) },
+ { "vlan_error", offsetof(struct netvsc_ethtool_stats, vlan_error) },
}, pcpu_stats[] = {
{ "cpu%u_rx_packets",
offsetof(struct netvsc_ethtool_pcpu_stats, rx_packets) },
@@ -1385,8 +1490,8 @@
/* statistics per queue (rx/tx packets/bytes) */
#define NETVSC_PCPU_STATS_LEN (num_present_cpus() * ARRAY_SIZE(pcpu_stats))
-/* 4 statistics per queue (rx/tx packets/bytes) */
-#define NETVSC_QUEUE_STATS_LEN(dev) ((dev)->num_chn * 4)
+/* 5 statistics per queue (rx/tx packets/bytes, rx xdp_drop) */
+#define NETVSC_QUEUE_STATS_LEN(dev) ((dev)->num_chn * 5)
static int netvsc_get_sset_count(struct net_device *dev, int string_set)
{
@@ -1418,6 +1523,7 @@
struct netvsc_ethtool_pcpu_stats *pcpu_sum;
unsigned int start;
u64 packets, bytes;
+ u64 xdp_drop;
int i, j, cpu;
if (!nvdev)
@@ -1446,14 +1552,19 @@
start = u64_stats_fetch_begin_irq(&qstats->syncp);
packets = qstats->packets;
bytes = qstats->bytes;
+ xdp_drop = qstats->xdp_drop;
} while (u64_stats_fetch_retry_irq(&qstats->syncp, start));
data[i++] = packets;
data[i++] = bytes;
+ data[i++] = xdp_drop;
}
pcpu_sum = kvmalloc_array(num_possible_cpus(),
sizeof(struct netvsc_ethtool_pcpu_stats),
GFP_KERNEL);
+ if (!pcpu_sum)
+ return;
+
netvsc_get_pcpu_stats(dev, pcpu_sum);
for_each_present_cpu(cpu) {
struct netvsc_ethtool_pcpu_stats *this_sum = &pcpu_sum[cpu];
@@ -1495,6 +1606,8 @@
sprintf(p, "rx_queue_%u_packets", i);
p += ETH_GSTRING_LEN;
sprintf(p, "rx_queue_%u_bytes", i);
+ p += ETH_GSTRING_LEN;
+ sprintf(p, "rx_queue_%u_xdp_drop", i);
p += ETH_GSTRING_LEN;
}
@@ -1641,26 +1754,6 @@
return -EOPNOTSUPP;
}
-
-#ifdef CONFIG_NET_POLL_CONTROLLER
-static void netvsc_poll_controller(struct net_device *dev)
-{
- struct net_device_context *ndc = netdev_priv(dev);
- struct netvsc_device *ndev;
- int i;
-
- rcu_read_lock();
- ndev = rcu_dereference(ndc->nvdev);
- if (ndev) {
- for (i = 0; i < ndev->num_chn; i++) {
- struct netvsc_channel *nvchan = &ndev->chan_table[i];
-
- napi_schedule(&nvchan->napi);
- }
- }
- rcu_read_unlock();
-}
-#endif
static u32 netvsc_get_rxfh_key_size(struct net_device *dev)
{
@@ -1812,8 +1905,85 @@
}
out:
- kfree(device_info);
+ netvsc_devinfo_put(device_info);
return ret;
+}
+
+static netdev_features_t netvsc_fix_features(struct net_device *ndev,
+ netdev_features_t features)
+{
+ struct net_device_context *ndevctx = netdev_priv(ndev);
+ struct netvsc_device *nvdev = rtnl_dereference(ndevctx->nvdev);
+
+ if (!nvdev || nvdev->destroy)
+ return features;
+
+ if ((features & NETIF_F_LRO) && netvsc_xdp_get(nvdev)) {
+ features ^= NETIF_F_LRO;
+ netdev_info(ndev, "Skip LRO - unsupported with XDP\n");
+ }
+
+ return features;
+}
+
+static int netvsc_set_features(struct net_device *ndev,
+ netdev_features_t features)
+{
+ netdev_features_t change = features ^ ndev->features;
+ struct net_device_context *ndevctx = netdev_priv(ndev);
+ struct netvsc_device *nvdev = rtnl_dereference(ndevctx->nvdev);
+ struct net_device *vf_netdev = rtnl_dereference(ndevctx->vf_netdev);
+ struct ndis_offload_params offloads;
+ int ret = 0;
+
+ if (!nvdev || nvdev->destroy)
+ return -ENODEV;
+
+ if (!(change & NETIF_F_LRO))
+ goto syncvf;
+
+ memset(&offloads, 0, sizeof(struct ndis_offload_params));
+
+ if (features & NETIF_F_LRO) {
+ offloads.rsc_ip_v4 = NDIS_OFFLOAD_PARAMETERS_RSC_ENABLED;
+ offloads.rsc_ip_v6 = NDIS_OFFLOAD_PARAMETERS_RSC_ENABLED;
+ } else {
+ offloads.rsc_ip_v4 = NDIS_OFFLOAD_PARAMETERS_RSC_DISABLED;
+ offloads.rsc_ip_v6 = NDIS_OFFLOAD_PARAMETERS_RSC_DISABLED;
+ }
+
+ ret = rndis_filter_set_offload_params(ndev, nvdev, &offloads);
+
+ if (ret) {
+ features ^= NETIF_F_LRO;
+ ndev->features = features;
+ }
+
+syncvf:
+ if (!vf_netdev)
+ return ret;
+
+ vf_netdev->wanted_features = features;
+ netdev_update_features(vf_netdev);
+
+ return ret;
+}
+
+static int netvsc_get_regs_len(struct net_device *netdev)
+{
+ return VRSS_SEND_TAB_SIZE * sizeof(u32);
+}
+
+static void netvsc_get_regs(struct net_device *netdev,
+ struct ethtool_regs *regs, void *p)
+{
+ struct net_device_context *ndc = netdev_priv(netdev);
+ u32 *regs_buff = p;
+
+ /* increase the version, if buffer format is changed. */
+ regs->version = 1;
+
+ memcpy(regs_buff, ndc->tx_table, VRSS_SEND_TAB_SIZE * sizeof(u32));
}
static u32 netvsc_get_msglevel(struct net_device *ndev)
@@ -1832,6 +2002,8 @@
static const struct ethtool_ops ethtool_ops = {
.get_drvinfo = netvsc_get_drvinfo,
+ .get_regs_len = netvsc_get_regs_len,
+ .get_regs = netvsc_get_regs,
.get_msglevel = netvsc_get_msglevel,
.set_msglevel = netvsc_set_msglevel,
.get_link = ethtool_op_get_link,
@@ -1859,14 +2031,14 @@
.ndo_start_xmit = netvsc_start_xmit,
.ndo_change_rx_flags = netvsc_change_rx_flags,
.ndo_set_rx_mode = netvsc_set_rx_mode,
+ .ndo_fix_features = netvsc_fix_features,
+ .ndo_set_features = netvsc_set_features,
.ndo_change_mtu = netvsc_change_mtu,
.ndo_validate_addr = eth_validate_addr,
.ndo_set_mac_address = netvsc_set_mac_addr,
.ndo_select_queue = netvsc_select_queue,
.ndo_get_stats64 = netvsc_get_stats64,
-#ifdef CONFIG_NET_POLL_CONTROLLER
- .ndo_poll_controller = netvsc_poll_controller,
-#endif
+ .ndo_bpf = netvsc_bpf,
};
/*
@@ -2073,7 +2245,7 @@
"unable to change mtu to %u\n", ndev->mtu);
/* set multicast etc flags on VF */
- dev_change_flags(vf_netdev, ndev->flags | IFF_SLAVE);
+ dev_change_flags(vf_netdev, ndev->flags | IFF_SLAVE, NULL);
/* sync address list from ndev to VF */
netif_addr_lock_bh(ndev);
@@ -2082,7 +2254,7 @@
netif_addr_unlock_bh(ndev);
if (netif_running(ndev)) {
- ret = dev_open(vf_netdev);
+ ret = dev_open(vf_netdev, NULL);
if (ret)
netdev_warn(vf_netdev,
"unable to open: %d\n", ret);
@@ -2118,6 +2290,7 @@
{
struct device *parent = vf_netdev->dev.parent;
struct net_device_context *ndev_ctx;
+ struct net_device *ndev;
struct pci_dev *pdev;
u32 serial;
@@ -2144,6 +2317,18 @@
return hv_get_drvdata(ndev_ctx->device_ctx);
}
+ /* Fallback path to check synthetic vf with
+ * help of mac addr
+ */
+ list_for_each_entry(ndev_ctx, &netvsc_dev_list, list) {
+ ndev = hv_get_drvdata(ndev_ctx->device_ctx);
+ if (ether_addr_equal(vf_netdev->perm_addr, ndev->perm_addr)) {
+ netdev_notice(vf_netdev,
+ "falling back to mac addr based matching\n");
+ return ndev;
+ }
+ }
+
netdev_notice(vf_netdev,
"no netdev found for vf serial:%u\n", serial);
return NULL;
@@ -2153,6 +2338,7 @@
{
struct net_device_context *net_device_ctx;
struct netvsc_device *netvsc_dev;
+ struct bpf_prog *prog;
struct net_device *ndev;
int ret;
@@ -2168,7 +2354,7 @@
if (!netvsc_dev || rtnl_dereference(net_device_ctx->vf_netdev))
return NOTIFY_DONE;
- /* if syntihetic interface is a different namespace,
+ /* if synthetic interface is a different namespace,
* then move the VF to that namespace; join will be
* done again in that context.
*/
@@ -2193,10 +2379,26 @@
dev_hold(vf_netdev);
rcu_assign_pointer(net_device_ctx->vf_netdev, vf_netdev);
+
+ vf_netdev->wanted_features = ndev->features;
+ netdev_update_features(vf_netdev);
+
+ prog = netvsc_xdp_get(netvsc_dev);
+ netvsc_vf_setxdp(vf_netdev, prog);
+
return NOTIFY_OK;
}
-/* VF up/down change detected, schedule to change data path */
+/* Change the data path when VF UP/DOWN/CHANGE are detected.
+ *
+ * Typically a UP or DOWN event is followed by a CHANGE event, so
+ * net_device_ctx->data_path_is_vf is used to cache the current data path
+ * to avoid the duplicate call of netvsc_switch_datapath() and the duplicate
+ * message.
+ *
+ * During hibernation, if a VF NIC driver (e.g. mlx5) preserves the network
+ * interface, there is only the CHANGE event and no UP or DOWN event.
+ */
static int netvsc_vf_changed(struct net_device *vf_netdev)
{
struct net_device_context *net_device_ctx;
@@ -2212,6 +2414,15 @@
netvsc_dev = rtnl_dereference(net_device_ctx->nvdev);
if (!netvsc_dev)
return NOTIFY_DONE;
+
+ if (net_device_ctx->data_path_is_vf == vf_is_up)
+ return NOTIFY_OK;
+ net_device_ctx->data_path_is_vf = vf_is_up;
+
+ if (vf_is_up && !net_device_ctx->vf_alloc) {
+ netdev_info(ndev, "Waiting for the VF association from host\n");
+ wait_for_completion(&net_device_ctx->vf_add);
+ }
netvsc_switch_datapath(ndev, vf_is_up);
netdev_info(ndev, "Data path switched %s VF: %s\n",
@@ -2234,6 +2445,9 @@
netdev_info(ndev, "VF unregistering: %s\n", vf_netdev->name);
+ netvsc_vf_setxdp(vf_netdev, NULL);
+
+ reinit_completion(&net_device_ctx->vf_add);
netdev_rx_handler_unregister(vf_netdev);
netdev_upper_dev_unlink(vf_netdev, ndev);
RCU_INIT_POINTER(net_device_ctx->vf_netdev, NULL);
@@ -2271,6 +2485,7 @@
INIT_DELAYED_WORK(&net_device_ctx->dwork, netvsc_link_change);
+ init_completion(&net_device_ctx->vf_add);
spin_lock_init(&net_device_ctx->lock);
INIT_LIST_HEAD(&net_device_ctx->reconfig_events);
INIT_DELAYED_WORK(&net_device_ctx->vf_takeover, netvsc_vf_setup);
@@ -2316,7 +2531,7 @@
* netvsc_probe() can't get rtnl lock and as a result vmbus_onoffer()
* -> ... -> device_add() -> ... -> __device_attach() can't get
* the device lock, so all the subchannels can't be processed --
- * finally netvsc_subchan_work() hangs for ever.
+ * finally netvsc_subchan_work() hangs forever.
*/
rtnl_lock();
@@ -2325,8 +2540,8 @@
/* hw_features computed in rndis_netdev_set_hwcaps() */
net->features = net->hw_features |
- NETIF_F_HIGHDMA | NETIF_F_SG |
- NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX;
+ NETIF_F_HIGHDMA | NETIF_F_HW_VLAN_CTAG_TX |
+ NETIF_F_HW_VLAN_CTAG_RX;
net->vlan_features = net->features;
netdev_lockdep_set_classes(net);
@@ -2349,14 +2564,14 @@
list_add(&net_device_ctx->list, &netvsc_dev_list);
rtnl_unlock();
- kfree(device_info);
+ netvsc_devinfo_put(device_info);
return 0;
register_failed:
rtnl_unlock();
rndis_filter_device_remove(dev, nvdev);
rndis_failed:
- kfree(device_info);
+ netvsc_devinfo_put(device_info);
devinfo_failed:
free_percpu(net_device_ctx->vf_stats);
no_stats:
@@ -2384,8 +2599,10 @@
rtnl_lock();
nvdev = rtnl_dereference(ndev_ctx->nvdev);
- if (nvdev)
+ if (nvdev) {
cancel_work_sync(&nvdev->subchan_work);
+ netvsc_xdp_set(net, NULL, NULL, nvdev);
+ }
/*
* Call to the vsc driver to let it know that the device is being
@@ -2410,6 +2627,66 @@
return 0;
}
+static int netvsc_suspend(struct hv_device *dev)
+{
+ struct net_device_context *ndev_ctx;
+ struct netvsc_device *nvdev;
+ struct net_device *net;
+ int ret;
+
+ net = hv_get_drvdata(dev);
+
+ ndev_ctx = netdev_priv(net);
+ cancel_delayed_work_sync(&ndev_ctx->dwork);
+
+ rtnl_lock();
+
+ nvdev = rtnl_dereference(ndev_ctx->nvdev);
+ if (nvdev == NULL) {
+ ret = -ENODEV;
+ goto out;
+ }
+
+ /* Save the current config info */
+ ndev_ctx->saved_netvsc_dev_info = netvsc_devinfo_get(nvdev);
+ if (!ndev_ctx->saved_netvsc_dev_info) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ ret = netvsc_detach(net, nvdev);
+out:
+ rtnl_unlock();
+
+ return ret;
+}
+
+static int netvsc_resume(struct hv_device *dev)
+{
+ struct net_device *net = hv_get_drvdata(dev);
+ struct net_device_context *net_device_ctx;
+ struct netvsc_device_info *device_info;
+ int ret;
+
+ rtnl_lock();
+
+ net_device_ctx = netdev_priv(net);
+
+ /* Reset the data path to the netvsc NIC before re-opening the vmbus
+ * channel. Later netvsc_netdev_event() will switch the data path to
+ * the VF upon the UP or CHANGE event.
+ */
+ net_device_ctx->data_path_is_vf = false;
+ device_info = net_device_ctx->saved_netvsc_dev_info;
+
+ ret = netvsc_attach(net, device_info);
+
+ netvsc_devinfo_put(device_info);
+ net_device_ctx->saved_netvsc_dev_info = NULL;
+
+ rtnl_unlock();
+
+ return ret;
+}
static const struct hv_vmbus_device_id id_table[] = {
/* Network guid */
{ HV_NIC_GUID, },
@@ -2424,6 +2701,8 @@
.id_table = id_table,
.probe = netvsc_probe,
.remove = netvsc_remove,
+ .suspend = netvsc_suspend,
+ .resume = netvsc_resume,
.driver = {
.probe_type = PROBE_FORCE_SYNCHRONOUS,
},
@@ -2464,6 +2743,7 @@
return netvsc_unregister_vf(event_dev);
case NETDEV_UP:
case NETDEV_DOWN:
+ case NETDEV_CHANGE:
return netvsc_vf_changed(event_dev);
default:
return NOTIFY_DONE;
--
Gitblit v1.6.2