hc
2024-05-16 8d2a02b24d66aa359e83eebc1ed3c0f85367a1cb
kernel/drivers/net/hyperv/netvsc_drv.c
....@@ -1,17 +1,6 @@
1
+// SPDX-License-Identifier: GPL-2.0-only
12 /*
23 * Copyright (c) 2009, Microsoft Corporation.
3
- *
4
- * This program is free software; you can redistribute it and/or modify it
5
- * under the terms and conditions of the GNU General Public License,
6
- * version 2, as published by the Free Software Foundation.
7
- *
8
- * This program is distributed in the hope it will be useful, but WITHOUT
9
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11
- * more details.
12
- *
13
- * You should have received a copy of the GNU General Public License along with
14
- * this program; if not, see <http://www.gnu.org/licenses/>.
154 *
165 * Authors:
176 * Haiyang Zhang <haiyangz@microsoft.com>
....@@ -36,6 +25,7 @@
3625 #include <linux/slab.h>
3726 #include <linux/rtnetlink.h>
3827 #include <linux/netpoll.h>
28
+#include <linux/bpf.h>
3929
4030 #include <net/arp.h>
4131 #include <net/route.h>
....@@ -146,7 +136,7 @@
146136 * slave as up. If open fails, then slave will be
147137 * still be offline (and not used).
148138 */
149
- ret = dev_open(vf_netdev);
139
+ ret = dev_open(vf_netdev, NULL);
150140 if (ret)
151141 netdev_warn(net,
152142 "unable to open slave: %s: %d\n",
....@@ -246,6 +236,7 @@
246236
247237 ppi->size = ppi_size;
248238 ppi->type = pkt_type;
239
+ ppi->internal = 0;
249240 ppi->ppi_offset = sizeof(struct rndis_per_packet_info);
250241
251242 rndis_pkt->per_pkt_info_len += ppi_size;
....@@ -327,7 +318,7 @@
327318 * If a valid queue has already been assigned, then use that.
328319 * Otherwise compute tx queue based on hash and the send table.
329320 *
330
- * This is basically similar to default (__netdev_pick_tx) with the added step
321
+ * This is basically similar to default (netdev_pick_tx) with the added step
331322 * of using the host send_table when no other queue has been assigned.
332323 *
333324 * TODO support XPS - but get_xps_queue not exported
....@@ -350,8 +341,7 @@
350341 }
351342
352343 static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb,
353
- struct net_device *sb_dev,
354
- select_queue_fallback_t fallback)
344
+ struct net_device *sb_dev)
355345 {
356346 struct net_device_context *ndc = netdev_priv(ndev);
357347 struct net_device *vf_netdev;
....@@ -363,10 +353,9 @@
363353 const struct net_device_ops *vf_ops = vf_netdev->netdev_ops;
364354
365355 if (vf_ops->ndo_select_queue)
366
- txq = vf_ops->ndo_select_queue(vf_netdev, skb,
367
- sb_dev, fallback);
356
+ txq = vf_ops->ndo_select_queue(vf_netdev, skb, sb_dev);
368357 else
369
- txq = fallback(vf_netdev, skb, NULL);
358
+ txq = netdev_pick_tx(vf_netdev, skb, NULL);
370359
371360 /* Record the queue selected by VF so that it can be
372361 * used for common case where VF has more queues than
....@@ -384,32 +373,29 @@
384373 return txq;
385374 }
386375
387
-static u32 fill_pg_buf(struct page *page, u32 offset, u32 len,
376
+static u32 fill_pg_buf(unsigned long hvpfn, u32 offset, u32 len,
388377 struct hv_page_buffer *pb)
389378 {
390379 int j = 0;
391380
392
- /* Deal with compund pages by ignoring unused part
393
- * of the page.
394
- */
395
- page += (offset >> PAGE_SHIFT);
396
- offset &= ~PAGE_MASK;
381
+ hvpfn += offset >> HV_HYP_PAGE_SHIFT;
382
+ offset = offset & ~HV_HYP_PAGE_MASK;
397383
398384 while (len > 0) {
399385 unsigned long bytes;
400386
401
- bytes = PAGE_SIZE - offset;
387
+ bytes = HV_HYP_PAGE_SIZE - offset;
402388 if (bytes > len)
403389 bytes = len;
404
- pb[j].pfn = page_to_pfn(page);
390
+ pb[j].pfn = hvpfn;
405391 pb[j].offset = offset;
406392 pb[j].len = bytes;
407393
408394 offset += bytes;
409395 len -= bytes;
410396
411
- if (offset == PAGE_SIZE && len) {
412
- page++;
397
+ if (offset == HV_HYP_PAGE_SIZE && len) {
398
+ hvpfn++;
413399 offset = 0;
414400 j++;
415401 }
....@@ -432,23 +418,26 @@
432418 * 2. skb linear data
433419 * 3. skb fragment data
434420 */
435
- slots_used += fill_pg_buf(virt_to_page(hdr),
436
- offset_in_page(hdr),
437
- len, &pb[slots_used]);
421
+ slots_used += fill_pg_buf(virt_to_hvpfn(hdr),
422
+ offset_in_hvpage(hdr),
423
+ len,
424
+ &pb[slots_used]);
438425
439426 packet->rmsg_size = len;
440427 packet->rmsg_pgcnt = slots_used;
441428
442
- slots_used += fill_pg_buf(virt_to_page(data),
443
- offset_in_page(data),
444
- skb_headlen(skb), &pb[slots_used]);
429
+ slots_used += fill_pg_buf(virt_to_hvpfn(data),
430
+ offset_in_hvpage(data),
431
+ skb_headlen(skb),
432
+ &pb[slots_used]);
445433
446434 for (i = 0; i < frags; i++) {
447435 skb_frag_t *frag = skb_shinfo(skb)->frags + i;
448436
449
- slots_used += fill_pg_buf(skb_frag_page(frag),
450
- frag->page_offset,
451
- skb_frag_size(frag), &pb[slots_used]);
437
+ slots_used += fill_pg_buf(page_to_hvpfn(skb_frag_page(frag)),
438
+ skb_frag_off(frag),
439
+ skb_frag_size(frag),
440
+ &pb[slots_used]);
452441 }
453442 return slots_used;
454443 }
....@@ -461,11 +450,11 @@
461450 for (i = 0; i < frags; i++) {
462451 skb_frag_t *frag = skb_shinfo(skb)->frags + i;
463452 unsigned long size = skb_frag_size(frag);
464
- unsigned long offset = frag->page_offset;
453
+ unsigned long offset = skb_frag_off(frag);
465454
466455 /* Skip unused frames from start of page */
467
- offset &= ~PAGE_MASK;
468
- pages += PFN_UP(offset + size);
456
+ offset &= ~HV_HYP_PAGE_MASK;
457
+ pages += HVPFN_UP(offset + size);
469458 }
470459 return pages;
471460 }
....@@ -473,12 +462,12 @@
473462 static int netvsc_get_slots(struct sk_buff *skb)
474463 {
475464 char *data = skb->data;
476
- unsigned int offset = offset_in_page(data);
465
+ unsigned int offset = offset_in_hvpage(data);
477466 unsigned int len = skb_headlen(skb);
478467 int slots;
479468 int frag_slots;
480469
481
- slots = DIV_ROUND_UP(offset + len, PAGE_SIZE);
470
+ slots = DIV_ROUND_UP(offset + len, HV_HYP_PAGE_SIZE);
482471 frag_slots = count_skb_frag_slots(skb);
483472 return slots + frag_slots;
484473 }
....@@ -531,7 +520,7 @@
531520 return rc;
532521 }
533522
534
-static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
523
+static int netvsc_xmit(struct sk_buff *skb, struct net_device *net, bool xdp_tx)
535524 {
536525 struct net_device_context *net_device_ctx = netdev_priv(net);
537526 struct hv_netvsc_packet *packet = NULL;
....@@ -584,7 +573,7 @@
584573
585574 /* Use the skb control buffer for building up the packet */
586575 BUILD_BUG_ON(sizeof(struct hv_netvsc_packet) >
587
- FIELD_SIZEOF(struct sk_buff, cb));
576
+ sizeof_field(struct sk_buff, cb));
588577 packet = (struct hv_netvsc_packet *)skb->cb;
589578
590579 packet->q_idx = skb_get_queue_mapping(skb);
....@@ -617,6 +606,29 @@
617606 *hash_info = hash;
618607 }
619608
609
+ /* When using AF_PACKET we need to drop VLAN header from
610
+ * the frame and update the SKB to allow the HOST OS
611
+ * to transmit the 802.1Q packet
612
+ */
613
+ if (skb->protocol == htons(ETH_P_8021Q)) {
614
+ u16 vlan_tci;
615
+
616
+ skb_reset_mac_header(skb);
617
+ if (eth_type_vlan(eth_hdr(skb)->h_proto)) {
618
+ if (unlikely(__skb_vlan_pop(skb, &vlan_tci) != 0)) {
619
+ ++net_device_ctx->eth_stats.vlan_error;
620
+ goto drop;
621
+ }
622
+
623
+ __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tci);
624
+ /* Update the NDIS header pkt lengths */
625
+ packet->total_data_buflen -= VLAN_HLEN;
626
+ packet->total_bytes -= VLAN_HLEN;
627
+ rndis_msg->msg_len = packet->total_data_buflen;
628
+ rndis_msg->msg.pkt.data_len = packet->total_data_buflen;
629
+ }
630
+ }
631
+
620632 if (skb_vlan_tag_present(skb)) {
621633 struct ndis_pkt_8021q_info *vlan;
622634
....@@ -625,9 +637,9 @@
625637 IEEE_8021Q_INFO);
626638
627639 vlan->value = 0;
628
- vlan->vlanid = skb->vlan_tci & VLAN_VID_MASK;
629
- vlan->pri = (skb->vlan_tci & VLAN_PRIO_MASK) >>
630
- VLAN_PRIO_SHIFT;
640
+ vlan->vlanid = skb_vlan_tag_get_id(skb);
641
+ vlan->cfi = skb_vlan_tag_get_cfi(skb);
642
+ vlan->pri = skb_vlan_tag_get_prio(skb);
631643 }
632644
633645 if (skb_is_gso(skb)) {
....@@ -650,10 +662,7 @@
650662 } else {
651663 lso_info->lso_v2_transmit.ip_version =
652664 NDIS_TCP_LARGE_SEND_OFFLOAD_IPV6;
653
- ipv6_hdr(skb)->payload_len = 0;
654
- tcp_hdr(skb)->check =
655
- ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
656
- &ipv6_hdr(skb)->daddr, 0, IPPROTO_TCP, 0);
665
+ tcp_v6_gso_csum_prep(skb);
657666 }
658667 lso_info->lso_v2_transmit.tcp_header_offset = skb_transport_offset(skb);
659668 lso_info->lso_v2_transmit.mss = skb_shinfo(skb)->gso_size;
....@@ -699,7 +708,7 @@
699708 /* timestamp packet in software */
700709 skb_tx_timestamp(skb);
701710
702
- ret = netvsc_send(net, packet, rndis_msg, pb, skb);
711
+ ret = netvsc_send(net, packet, rndis_msg, pb, skb, xdp_tx);
703712 if (likely(ret == 0))
704713 return NETDEV_TX_OK;
705714
....@@ -722,6 +731,12 @@
722731 goto drop;
723732 }
724733
734
+static netdev_tx_t netvsc_start_xmit(struct sk_buff *skb,
735
+ struct net_device *ndev)
736
+{
737
+ return netvsc_xmit(skb, ndev, false);
738
+}
739
+
725740 /*
726741 * netvsc_linkstatus_callback - Link up/down notification
727742 */
....@@ -732,6 +747,13 @@
732747 struct net_device_context *ndev_ctx = netdev_priv(net);
733748 struct netvsc_reconfig *event;
734749 unsigned long flags;
750
+
751
+ /* Ensure the packet is big enough to access its fields */
752
+ if (resp->msg_len - RNDIS_HEADER_SIZE < sizeof(struct rndis_indicate_status)) {
753
+ netdev_err(net, "invalid rndis_indicate_status packet, len: %u\n",
754
+ resp->msg_len);
755
+ return;
756
+ }
735757
736758 /* Update the physical link speed when changing to another vSwitch */
737759 if (indicate->status == RNDIS_STATUS_LINK_SPEED_CHANGE) {
....@@ -764,6 +786,22 @@
764786 schedule_delayed_work(&ndev_ctx->dwork, 0);
765787 }
766788
789
+static void netvsc_xdp_xmit(struct sk_buff *skb, struct net_device *ndev)
790
+{
791
+ int rc;
792
+
793
+ skb->queue_mapping = skb_get_rx_queue(skb);
794
+ __skb_push(skb, ETH_HLEN);
795
+
796
+ rc = netvsc_xmit(skb, ndev, true);
797
+
798
+ if (dev_xmit_complete(rc))
799
+ return;
800
+
801
+ dev_kfree_skb_any(skb);
802
+ ndev->stats.tx_dropped++;
803
+}
804
+
767805 static void netvsc_comp_ipcsum(struct sk_buff *skb)
768806 {
769807 struct iphdr *iph = (struct iphdr *)skb->data;
....@@ -773,22 +811,46 @@
773811 }
774812
775813 static struct sk_buff *netvsc_alloc_recv_skb(struct net_device *net,
776
- struct napi_struct *napi,
777
- const struct ndis_tcp_ip_checksum_info *csum_info,
778
- const struct ndis_pkt_8021q_info *vlan,
779
- void *data, u32 buflen)
814
+ struct netvsc_channel *nvchan,
815
+ struct xdp_buff *xdp)
780816 {
817
+ struct napi_struct *napi = &nvchan->napi;
818
+ const struct ndis_pkt_8021q_info *vlan = nvchan->rsc.vlan;
819
+ const struct ndis_tcp_ip_checksum_info *csum_info =
820
+ nvchan->rsc.csum_info;
821
+ const u32 *hash_info = nvchan->rsc.hash_info;
781822 struct sk_buff *skb;
823
+ void *xbuf = xdp->data_hard_start;
824
+ int i;
782825
783
- skb = napi_alloc_skb(napi, buflen);
784
- if (!skb)
785
- return skb;
826
+ if (xbuf) {
827
+ unsigned int hdroom = xdp->data - xdp->data_hard_start;
828
+ unsigned int xlen = xdp->data_end - xdp->data;
829
+ unsigned int frag_size = xdp->frame_sz;
786830
787
- /*
788
- * Copy to skb. This copy is needed here since the memory pointed by
789
- * hv_netvsc_packet cannot be deallocated
790
- */
791
- skb_put_data(skb, data, buflen);
831
+ skb = build_skb(xbuf, frag_size);
832
+
833
+ if (!skb) {
834
+ __free_page(virt_to_page(xbuf));
835
+ return NULL;
836
+ }
837
+
838
+ skb_reserve(skb, hdroom);
839
+ skb_put(skb, xlen);
840
+ skb->dev = napi->dev;
841
+ } else {
842
+ skb = napi_alloc_skb(napi, nvchan->rsc.pktlen);
843
+
844
+ if (!skb)
845
+ return NULL;
846
+
847
+ /* Copy to skb. This copy is needed here since the memory
848
+ * pointed by hv_netvsc_packet cannot be deallocated.
849
+ */
850
+ for (i = 0; i < nvchan->rsc.cnt; i++)
851
+ skb_put_data(skb, nvchan->rsc.data[i],
852
+ nvchan->rsc.len[i]);
853
+ }
792854
793855 skb->protocol = eth_type_trans(skb, net);
794856
....@@ -812,8 +874,12 @@
812874 skb->ip_summed = CHECKSUM_UNNECESSARY;
813875 }
814876
877
+ if (hash_info && (net->features & NETIF_F_RXHASH))
878
+ skb_set_hash(skb, *hash_info, PKT_HASH_TYPE_L4);
879
+
815880 if (vlan) {
816
- u16 vlan_tci = vlan->vlanid | (vlan->pri << VLAN_PRIO_SHIFT);
881
+ u16 vlan_tci = vlan->vlanid | (vlan->pri << VLAN_PRIO_SHIFT) |
882
+ (vlan->cfi ? VLAN_CFI_MASK : 0);
817883
818884 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
819885 vlan_tci);
....@@ -828,23 +894,32 @@
828894 */
829895 int netvsc_recv_callback(struct net_device *net,
830896 struct netvsc_device *net_device,
831
- struct vmbus_channel *channel,
832
- void *data, u32 len,
833
- const struct ndis_tcp_ip_checksum_info *csum_info,
834
- const struct ndis_pkt_8021q_info *vlan)
897
+ struct netvsc_channel *nvchan)
835898 {
836899 struct net_device_context *net_device_ctx = netdev_priv(net);
900
+ struct vmbus_channel *channel = nvchan->channel;
837901 u16 q_idx = channel->offermsg.offer.sub_channel_index;
838
- struct netvsc_channel *nvchan = &net_device->chan_table[q_idx];
839902 struct sk_buff *skb;
840
- struct netvsc_stats *rx_stats;
903
+ struct netvsc_stats *rx_stats = &nvchan->rx_stats;
904
+ struct xdp_buff xdp;
905
+ u32 act;
841906
842907 if (net->reg_state != NETREG_REGISTERED)
843908 return NVSP_STAT_FAIL;
844909
910
+ act = netvsc_run_xdp(net, nvchan, &xdp);
911
+
912
+ if (act != XDP_PASS && act != XDP_TX) {
913
+ u64_stats_update_begin(&rx_stats->syncp);
914
+ rx_stats->xdp_drop++;
915
+ u64_stats_update_end(&rx_stats->syncp);
916
+
917
+ return NVSP_STAT_SUCCESS; /* consumed by XDP */
918
+ }
919
+
845920 /* Allocate a skb - TODO direct I/O to pages? */
846
- skb = netvsc_alloc_recv_skb(net, &nvchan->napi,
847
- csum_info, vlan, data, len);
921
+ skb = netvsc_alloc_recv_skb(net, nvchan, &xdp);
922
+
848923 if (unlikely(!skb)) {
849924 ++net_device_ctx->eth_stats.rx_no_memory;
850925 return NVSP_STAT_FAIL;
....@@ -857,16 +932,20 @@
857932 * on the synthetic device because modifying the VF device
858933 * statistics will not work correctly.
859934 */
860
- rx_stats = &nvchan->rx_stats;
861935 u64_stats_update_begin(&rx_stats->syncp);
862936 rx_stats->packets++;
863
- rx_stats->bytes += len;
937
+ rx_stats->bytes += nvchan->rsc.pktlen;
864938
865939 if (skb->pkt_type == PACKET_BROADCAST)
866940 ++rx_stats->broadcast;
867941 else if (skb->pkt_type == PACKET_MULTICAST)
868942 ++rx_stats->multicast;
869943 u64_stats_update_end(&rx_stats->syncp);
944
+
945
+ if (act == XDP_TX) {
946
+ netvsc_xdp_xmit(skb, net);
947
+ return NVSP_STAT_SUCCESS;
948
+ }
870949
871950 napi_gro_receive(&nvchan->napi, skb);
872951 return NVSP_STAT_SUCCESS;
....@@ -894,10 +973,11 @@
894973 /* Alloc struct netvsc_device_info, and initialize it from either existing
895974 * struct netvsc_device, or from default values.
896975 */
897
-static struct netvsc_device_info *netvsc_devinfo_get
898
- (struct netvsc_device *nvdev)
976
+static
977
+struct netvsc_device_info *netvsc_devinfo_get(struct netvsc_device *nvdev)
899978 {
900979 struct netvsc_device_info *dev_info;
980
+ struct bpf_prog *prog;
901981
902982 dev_info = kzalloc(sizeof(*dev_info), GFP_ATOMIC);
903983
....@@ -905,6 +985,8 @@
905985 return NULL;
906986
907987 if (nvdev) {
988
+ ASSERT_RTNL();
989
+
908990 dev_info->num_chn = nvdev->num_chn;
909991 dev_info->send_sections = nvdev->send_section_cnt;
910992 dev_info->send_section_size = nvdev->send_section_size;
....@@ -913,6 +995,12 @@
913995
914996 memcpy(dev_info->rss_key, nvdev->extension->rss_key,
915997 NETVSC_HASH_KEYLEN);
998
+
999
+ prog = netvsc_xdp_get(nvdev);
1000
+ if (prog) {
1001
+ bpf_prog_inc(prog);
1002
+ dev_info->bprog = prog;
1003
+ }
9161004 } else {
9171005 dev_info->num_chn = VRSS_CHANNEL_DEFAULT;
9181006 dev_info->send_sections = NETVSC_DEFAULT_TX;
....@@ -922,6 +1010,17 @@
9221010 }
9231011
9241012 return dev_info;
1013
+}
1014
+
1015
+/* Free struct netvsc_device_info */
1016
+static void netvsc_devinfo_put(struct netvsc_device_info *dev_info)
1017
+{
1018
+ if (dev_info->bprog) {
1019
+ ASSERT_RTNL();
1020
+ bpf_prog_put(dev_info->bprog);
1021
+ }
1022
+
1023
+ kfree(dev_info);
9251024 }
9261025
9271026 static int netvsc_detach(struct net_device *ndev,
....@@ -934,6 +1033,8 @@
9341033 /* Don't try continuing to try and setup sub channels */
9351034 if (cancel_work_sync(&nvdev->subchan_work))
9361035 nvdev->num_chn = 1;
1036
+
1037
+ netvsc_xdp_set(ndev, NULL, NULL, nvdev);
9371038
9381039 /* If device was up (receiving) then shutdown */
9391040 if (netif_running(ndev)) {
....@@ -968,7 +1069,8 @@
9681069 struct hv_device *hdev = ndev_ctx->device_ctx;
9691070 struct netvsc_device *nvdev;
9701071 struct rndis_device *rdev;
971
- int ret;
1072
+ struct bpf_prog *prog;
1073
+ int ret = 0;
9721074
9731075 nvdev = rndis_filter_device_add(hdev, dev_info);
9741076 if (IS_ERR(nvdev))
....@@ -984,6 +1086,16 @@
9841086 }
9851087 }
9861088
1089
+ prog = dev_info->bprog;
1090
+ if (prog) {
1091
+ bpf_prog_inc(prog);
1092
+ ret = netvsc_xdp_set(ndev, prog, NULL, nvdev);
1093
+ if (ret) {
1094
+ bpf_prog_put(prog);
1095
+ goto err1;
1096
+ }
1097
+ }
1098
+
9871099 /* In any case device is now ready */
9881100 nvdev->tx_disable = false;
9891101 netif_device_attach(ndev);
....@@ -994,7 +1106,7 @@
9941106 if (netif_running(ndev)) {
9951107 ret = rndis_filter_open(nvdev);
9961108 if (ret)
997
- goto err;
1109
+ goto err2;
9981110
9991111 rdev = nvdev->extension;
10001112 if (!rdev->link_state)
....@@ -1003,9 +1115,10 @@
10031115
10041116 return 0;
10051117
1006
-err:
1118
+err2:
10071119 netif_device_detach(ndev);
10081120
1121
+err1:
10091122 rndis_filter_device_remove(hdev, nvdev);
10101123
10111124 return ret;
....@@ -1055,25 +1168,8 @@
10551168 }
10561169
10571170 out:
1058
- kfree(device_info);
1171
+ netvsc_devinfo_put(device_info);
10591172 return ret;
1060
-}
1061
-
1062
-static bool
1063
-netvsc_validate_ethtool_ss_cmd(const struct ethtool_link_ksettings *cmd)
1064
-{
1065
- struct ethtool_link_ksettings diff1 = *cmd;
1066
- struct ethtool_link_ksettings diff2 = {};
1067
-
1068
- diff1.base.speed = 0;
1069
- diff1.base.duplex = 0;
1070
- /* advertising and cmd are usually set */
1071
- ethtool_link_ksettings_zero_link_mode(&diff1, advertising);
1072
- diff1.base.cmd = 0;
1073
- /* We set port to PORT_OTHER */
1074
- diff2.base.port = PORT_OTHER;
1075
-
1076
- return !memcmp(&diff1, &diff2, sizeof(diff1));
10771173 }
10781174
10791175 static void netvsc_init_settings(struct net_device *dev)
....@@ -1084,12 +1180,20 @@
10841180
10851181 ndc->speed = SPEED_UNKNOWN;
10861182 ndc->duplex = DUPLEX_FULL;
1183
+
1184
+ dev->features = NETIF_F_LRO;
10871185 }
10881186
10891187 static int netvsc_get_link_ksettings(struct net_device *dev,
10901188 struct ethtool_link_ksettings *cmd)
10911189 {
10921190 struct net_device_context *ndc = netdev_priv(dev);
1191
+ struct net_device *vf_netdev;
1192
+
1193
+ vf_netdev = rtnl_dereference(ndc->vf_netdev);
1194
+
1195
+ if (vf_netdev)
1196
+ return __ethtool_get_link_ksettings(vf_netdev, cmd);
10931197
10941198 cmd->base.speed = ndc->speed;
10951199 cmd->base.duplex = ndc->duplex;
....@@ -1102,18 +1206,18 @@
11021206 const struct ethtool_link_ksettings *cmd)
11031207 {
11041208 struct net_device_context *ndc = netdev_priv(dev);
1105
- u32 speed;
1209
+ struct net_device *vf_netdev = rtnl_dereference(ndc->vf_netdev);
11061210
1107
- speed = cmd->base.speed;
1108
- if (!ethtool_validate_speed(speed) ||
1109
- !ethtool_validate_duplex(cmd->base.duplex) ||
1110
- !netvsc_validate_ethtool_ss_cmd(cmd))
1111
- return -EINVAL;
1211
+ if (vf_netdev) {
1212
+ if (!vf_netdev->ethtool_ops->set_link_ksettings)
1213
+ return -EOPNOTSUPP;
11121214
1113
- ndc->speed = speed;
1114
- ndc->duplex = cmd->base.duplex;
1215
+ return vf_netdev->ethtool_ops->set_link_ksettings(vf_netdev,
1216
+ cmd);
1217
+ }
11151218
1116
- return 0;
1219
+ return ethtool_virtdev_set_link_ksettings(dev, cmd,
1220
+ &ndc->speed, &ndc->duplex);
11171221 }
11181222
11191223 static int netvsc_change_mtu(struct net_device *ndev, int mtu)
....@@ -1160,7 +1264,7 @@
11601264 dev_set_mtu(vf_netdev, orig_mtu);
11611265
11621266 out:
1163
- kfree(device_info);
1267
+ netvsc_devinfo_put(device_info);
11641268 return ret;
11651269 }
11661270
....@@ -1323,7 +1427,7 @@
13231427 return -ENODEV;
13241428
13251429 if (vf_netdev) {
1326
- err = dev_set_mac_address(vf_netdev, addr);
1430
+ err = dev_set_mac_address(vf_netdev, addr, NULL);
13271431 if (err)
13281432 return err;
13291433 }
....@@ -1334,7 +1438,7 @@
13341438 } else if (vf_netdev) {
13351439 /* rollback change on VF */
13361440 memcpy(addr->sa_data, ndev->dev_addr, ETH_ALEN);
1337
- dev_set_mac_address(vf_netdev, addr);
1441
+ dev_set_mac_address(vf_netdev, addr, NULL);
13381442 }
13391443
13401444 return err;
....@@ -1354,6 +1458,7 @@
13541458 { "rx_no_memory", offsetof(struct netvsc_ethtool_stats, rx_no_memory) },
13551459 { "stop_queue", offsetof(struct netvsc_ethtool_stats, stop_queue) },
13561460 { "wake_queue", offsetof(struct netvsc_ethtool_stats, wake_queue) },
1461
+ { "vlan_error", offsetof(struct netvsc_ethtool_stats, vlan_error) },
13571462 }, pcpu_stats[] = {
13581463 { "cpu%u_rx_packets",
13591464 offsetof(struct netvsc_ethtool_pcpu_stats, rx_packets) },
....@@ -1385,8 +1490,8 @@
13851490 /* statistics per queue (rx/tx packets/bytes) */
13861491 #define NETVSC_PCPU_STATS_LEN (num_present_cpus() * ARRAY_SIZE(pcpu_stats))
13871492
1388
-/* 4 statistics per queue (rx/tx packets/bytes) */
1389
-#define NETVSC_QUEUE_STATS_LEN(dev) ((dev)->num_chn * 4)
1493
+/* 5 statistics per queue (rx/tx packets/bytes, rx xdp_drop) */
1494
+#define NETVSC_QUEUE_STATS_LEN(dev) ((dev)->num_chn * 5)
13901495
13911496 static int netvsc_get_sset_count(struct net_device *dev, int string_set)
13921497 {
....@@ -1418,6 +1523,7 @@
14181523 struct netvsc_ethtool_pcpu_stats *pcpu_sum;
14191524 unsigned int start;
14201525 u64 packets, bytes;
1526
+ u64 xdp_drop;
14211527 int i, j, cpu;
14221528
14231529 if (!nvdev)
....@@ -1446,14 +1552,19 @@
14461552 start = u64_stats_fetch_begin_irq(&qstats->syncp);
14471553 packets = qstats->packets;
14481554 bytes = qstats->bytes;
1555
+ xdp_drop = qstats->xdp_drop;
14491556 } while (u64_stats_fetch_retry_irq(&qstats->syncp, start));
14501557 data[i++] = packets;
14511558 data[i++] = bytes;
1559
+ data[i++] = xdp_drop;
14521560 }
14531561
14541562 pcpu_sum = kvmalloc_array(num_possible_cpus(),
14551563 sizeof(struct netvsc_ethtool_pcpu_stats),
14561564 GFP_KERNEL);
1565
+ if (!pcpu_sum)
1566
+ return;
1567
+
14571568 netvsc_get_pcpu_stats(dev, pcpu_sum);
14581569 for_each_present_cpu(cpu) {
14591570 struct netvsc_ethtool_pcpu_stats *this_sum = &pcpu_sum[cpu];
....@@ -1495,6 +1606,8 @@
14951606 sprintf(p, "rx_queue_%u_packets", i);
14961607 p += ETH_GSTRING_LEN;
14971608 sprintf(p, "rx_queue_%u_bytes", i);
1609
+ p += ETH_GSTRING_LEN;
1610
+ sprintf(p, "rx_queue_%u_xdp_drop", i);
14981611 p += ETH_GSTRING_LEN;
14991612 }
15001613
....@@ -1641,26 +1754,6 @@
16411754
16421755 return -EOPNOTSUPP;
16431756 }
1644
-
1645
-#ifdef CONFIG_NET_POLL_CONTROLLER
1646
-static void netvsc_poll_controller(struct net_device *dev)
1647
-{
1648
- struct net_device_context *ndc = netdev_priv(dev);
1649
- struct netvsc_device *ndev;
1650
- int i;
1651
-
1652
- rcu_read_lock();
1653
- ndev = rcu_dereference(ndc->nvdev);
1654
- if (ndev) {
1655
- for (i = 0; i < ndev->num_chn; i++) {
1656
- struct netvsc_channel *nvchan = &ndev->chan_table[i];
1657
-
1658
- napi_schedule(&nvchan->napi);
1659
- }
1660
- }
1661
- rcu_read_unlock();
1662
-}
1663
-#endif
16641757
16651758 static u32 netvsc_get_rxfh_key_size(struct net_device *dev)
16661759 {
....@@ -1812,8 +1905,85 @@
18121905 }
18131906
18141907 out:
1815
- kfree(device_info);
1908
+ netvsc_devinfo_put(device_info);
18161909 return ret;
1910
+}
1911
+
1912
+static netdev_features_t netvsc_fix_features(struct net_device *ndev,
1913
+ netdev_features_t features)
1914
+{
1915
+ struct net_device_context *ndevctx = netdev_priv(ndev);
1916
+ struct netvsc_device *nvdev = rtnl_dereference(ndevctx->nvdev);
1917
+
1918
+ if (!nvdev || nvdev->destroy)
1919
+ return features;
1920
+
1921
+ if ((features & NETIF_F_LRO) && netvsc_xdp_get(nvdev)) {
1922
+ features ^= NETIF_F_LRO;
1923
+ netdev_info(ndev, "Skip LRO - unsupported with XDP\n");
1924
+ }
1925
+
1926
+ return features;
1927
+}
1928
+
1929
+static int netvsc_set_features(struct net_device *ndev,
1930
+ netdev_features_t features)
1931
+{
1932
+ netdev_features_t change = features ^ ndev->features;
1933
+ struct net_device_context *ndevctx = netdev_priv(ndev);
1934
+ struct netvsc_device *nvdev = rtnl_dereference(ndevctx->nvdev);
1935
+ struct net_device *vf_netdev = rtnl_dereference(ndevctx->vf_netdev);
1936
+ struct ndis_offload_params offloads;
1937
+ int ret = 0;
1938
+
1939
+ if (!nvdev || nvdev->destroy)
1940
+ return -ENODEV;
1941
+
1942
+ if (!(change & NETIF_F_LRO))
1943
+ goto syncvf;
1944
+
1945
+ memset(&offloads, 0, sizeof(struct ndis_offload_params));
1946
+
1947
+ if (features & NETIF_F_LRO) {
1948
+ offloads.rsc_ip_v4 = NDIS_OFFLOAD_PARAMETERS_RSC_ENABLED;
1949
+ offloads.rsc_ip_v6 = NDIS_OFFLOAD_PARAMETERS_RSC_ENABLED;
1950
+ } else {
1951
+ offloads.rsc_ip_v4 = NDIS_OFFLOAD_PARAMETERS_RSC_DISABLED;
1952
+ offloads.rsc_ip_v6 = NDIS_OFFLOAD_PARAMETERS_RSC_DISABLED;
1953
+ }
1954
+
1955
+ ret = rndis_filter_set_offload_params(ndev, nvdev, &offloads);
1956
+
1957
+ if (ret) {
1958
+ features ^= NETIF_F_LRO;
1959
+ ndev->features = features;
1960
+ }
1961
+
1962
+syncvf:
1963
+ if (!vf_netdev)
1964
+ return ret;
1965
+
1966
+ vf_netdev->wanted_features = features;
1967
+ netdev_update_features(vf_netdev);
1968
+
1969
+ return ret;
1970
+}
1971
+
1972
+static int netvsc_get_regs_len(struct net_device *netdev)
1973
+{
1974
+ return VRSS_SEND_TAB_SIZE * sizeof(u32);
1975
+}
1976
+
1977
+static void netvsc_get_regs(struct net_device *netdev,
1978
+ struct ethtool_regs *regs, void *p)
1979
+{
1980
+ struct net_device_context *ndc = netdev_priv(netdev);
1981
+ u32 *regs_buff = p;
1982
+
1983
+ /* increase the version, if buffer format is changed. */
1984
+ regs->version = 1;
1985
+
1986
+ memcpy(regs_buff, ndc->tx_table, VRSS_SEND_TAB_SIZE * sizeof(u32));
18171987 }
18181988
18191989 static u32 netvsc_get_msglevel(struct net_device *ndev)
....@@ -1832,6 +2002,8 @@
18322002
18332003 static const struct ethtool_ops ethtool_ops = {
18342004 .get_drvinfo = netvsc_get_drvinfo,
2005
+ .get_regs_len = netvsc_get_regs_len,
2006
+ .get_regs = netvsc_get_regs,
18352007 .get_msglevel = netvsc_get_msglevel,
18362008 .set_msglevel = netvsc_set_msglevel,
18372009 .get_link = ethtool_op_get_link,
....@@ -1859,14 +2031,14 @@
18592031 .ndo_start_xmit = netvsc_start_xmit,
18602032 .ndo_change_rx_flags = netvsc_change_rx_flags,
18612033 .ndo_set_rx_mode = netvsc_set_rx_mode,
2034
+ .ndo_fix_features = netvsc_fix_features,
2035
+ .ndo_set_features = netvsc_set_features,
18622036 .ndo_change_mtu = netvsc_change_mtu,
18632037 .ndo_validate_addr = eth_validate_addr,
18642038 .ndo_set_mac_address = netvsc_set_mac_addr,
18652039 .ndo_select_queue = netvsc_select_queue,
18662040 .ndo_get_stats64 = netvsc_get_stats64,
1867
-#ifdef CONFIG_NET_POLL_CONTROLLER
1868
- .ndo_poll_controller = netvsc_poll_controller,
1869
-#endif
2041
+ .ndo_bpf = netvsc_bpf,
18702042 };
18712043
18722044 /*
....@@ -2073,7 +2245,7 @@
20732245 "unable to change mtu to %u\n", ndev->mtu);
20742246
20752247 /* set multicast etc flags on VF */
2076
- dev_change_flags(vf_netdev, ndev->flags | IFF_SLAVE);
2248
+ dev_change_flags(vf_netdev, ndev->flags | IFF_SLAVE, NULL);
20772249
20782250 /* sync address list from ndev to VF */
20792251 netif_addr_lock_bh(ndev);
....@@ -2082,7 +2254,7 @@
20822254 netif_addr_unlock_bh(ndev);
20832255
20842256 if (netif_running(ndev)) {
2085
- ret = dev_open(vf_netdev);
2257
+ ret = dev_open(vf_netdev, NULL);
20862258 if (ret)
20872259 netdev_warn(vf_netdev,
20882260 "unable to open: %d\n", ret);
....@@ -2118,6 +2290,7 @@
21182290 {
21192291 struct device *parent = vf_netdev->dev.parent;
21202292 struct net_device_context *ndev_ctx;
2293
+ struct net_device *ndev;
21212294 struct pci_dev *pdev;
21222295 u32 serial;
21232296
....@@ -2144,6 +2317,18 @@
21442317 return hv_get_drvdata(ndev_ctx->device_ctx);
21452318 }
21462319
2320
+ /* Fallback path to check synthetic vf with
2321
+ * help of mac addr
2322
+ */
2323
+ list_for_each_entry(ndev_ctx, &netvsc_dev_list, list) {
2324
+ ndev = hv_get_drvdata(ndev_ctx->device_ctx);
2325
+ if (ether_addr_equal(vf_netdev->perm_addr, ndev->perm_addr)) {
2326
+ netdev_notice(vf_netdev,
2327
+ "falling back to mac addr based matching\n");
2328
+ return ndev;
2329
+ }
2330
+ }
2331
+
21472332 netdev_notice(vf_netdev,
21482333 "no netdev found for vf serial:%u\n", serial);
21492334 return NULL;
....@@ -2153,6 +2338,7 @@
21532338 {
21542339 struct net_device_context *net_device_ctx;
21552340 struct netvsc_device *netvsc_dev;
2341
+ struct bpf_prog *prog;
21562342 struct net_device *ndev;
21572343 int ret;
21582344
....@@ -2168,7 +2354,7 @@
21682354 if (!netvsc_dev || rtnl_dereference(net_device_ctx->vf_netdev))
21692355 return NOTIFY_DONE;
21702356
2171
- /* if syntihetic interface is a different namespace,
2357
+ /* if synthetic interface is a different namespace,
21722358 * then move the VF to that namespace; join will be
21732359 * done again in that context.
21742360 */
....@@ -2193,10 +2379,26 @@
21932379
21942380 dev_hold(vf_netdev);
21952381 rcu_assign_pointer(net_device_ctx->vf_netdev, vf_netdev);
2382
+
2383
+ vf_netdev->wanted_features = ndev->features;
2384
+ netdev_update_features(vf_netdev);
2385
+
2386
+ prog = netvsc_xdp_get(netvsc_dev);
2387
+ netvsc_vf_setxdp(vf_netdev, prog);
2388
+
21962389 return NOTIFY_OK;
21972390 }
21982391
2199
-/* VF up/down change detected, schedule to change data path */
2392
+/* Change the data path when VF UP/DOWN/CHANGE are detected.
2393
+ *
2394
+ * Typically a UP or DOWN event is followed by a CHANGE event, so
2395
+ * net_device_ctx->data_path_is_vf is used to cache the current data path
2396
+ * to avoid the duplicate call of netvsc_switch_datapath() and the duplicate
2397
+ * message.
2398
+ *
2399
+ * During hibernation, if a VF NIC driver (e.g. mlx5) preserves the network
2400
+ * interface, there is only the CHANGE event and no UP or DOWN event.
2401
+ */
22002402 static int netvsc_vf_changed(struct net_device *vf_netdev)
22012403 {
22022404 struct net_device_context *net_device_ctx;
....@@ -2212,6 +2414,15 @@
22122414 netvsc_dev = rtnl_dereference(net_device_ctx->nvdev);
22132415 if (!netvsc_dev)
22142416 return NOTIFY_DONE;
2417
+
2418
+ if (net_device_ctx->data_path_is_vf == vf_is_up)
2419
+ return NOTIFY_OK;
2420
+ net_device_ctx->data_path_is_vf = vf_is_up;
2421
+
2422
+ if (vf_is_up && !net_device_ctx->vf_alloc) {
2423
+ netdev_info(ndev, "Waiting for the VF association from host\n");
2424
+ wait_for_completion(&net_device_ctx->vf_add);
2425
+ }
22152426
22162427 netvsc_switch_datapath(ndev, vf_is_up);
22172428 netdev_info(ndev, "Data path switched %s VF: %s\n",
....@@ -2234,6 +2445,9 @@
22342445
22352446 netdev_info(ndev, "VF unregistering: %s\n", vf_netdev->name);
22362447
2448
+ netvsc_vf_setxdp(vf_netdev, NULL);
2449
+
2450
+ reinit_completion(&net_device_ctx->vf_add);
22372451 netdev_rx_handler_unregister(vf_netdev);
22382452 netdev_upper_dev_unlink(vf_netdev, ndev);
22392453 RCU_INIT_POINTER(net_device_ctx->vf_netdev, NULL);
....@@ -2271,6 +2485,7 @@
22712485
22722486 INIT_DELAYED_WORK(&net_device_ctx->dwork, netvsc_link_change);
22732487
2488
+ init_completion(&net_device_ctx->vf_add);
22742489 spin_lock_init(&net_device_ctx->lock);
22752490 INIT_LIST_HEAD(&net_device_ctx->reconfig_events);
22762491 INIT_DELAYED_WORK(&net_device_ctx->vf_takeover, netvsc_vf_setup);
....@@ -2316,7 +2531,7 @@
23162531 * netvsc_probe() can't get rtnl lock and as a result vmbus_onoffer()
23172532 * -> ... -> device_add() -> ... -> __device_attach() can't get
23182533 * the device lock, so all the subchannels can't be processed --
2319
- * finally netvsc_subchan_work() hangs for ever.
2534
+ * finally netvsc_subchan_work() hangs forever.
23202535 */
23212536 rtnl_lock();
23222537
....@@ -2325,8 +2540,8 @@
23252540
23262541 /* hw_features computed in rndis_netdev_set_hwcaps() */
23272542 net->features = net->hw_features |
2328
- NETIF_F_HIGHDMA | NETIF_F_SG |
2329
- NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX;
2543
+ NETIF_F_HIGHDMA | NETIF_F_HW_VLAN_CTAG_TX |
2544
+ NETIF_F_HW_VLAN_CTAG_RX;
23302545 net->vlan_features = net->features;
23312546
23322547 netdev_lockdep_set_classes(net);
....@@ -2349,14 +2564,14 @@
23492564 list_add(&net_device_ctx->list, &netvsc_dev_list);
23502565 rtnl_unlock();
23512566
2352
- kfree(device_info);
2567
+ netvsc_devinfo_put(device_info);
23532568 return 0;
23542569
23552570 register_failed:
23562571 rtnl_unlock();
23572572 rndis_filter_device_remove(dev, nvdev);
23582573 rndis_failed:
2359
- kfree(device_info);
2574
+ netvsc_devinfo_put(device_info);
23602575 devinfo_failed:
23612576 free_percpu(net_device_ctx->vf_stats);
23622577 no_stats:
....@@ -2384,8 +2599,10 @@
23842599
23852600 rtnl_lock();
23862601 nvdev = rtnl_dereference(ndev_ctx->nvdev);
2387
- if (nvdev)
2602
+ if (nvdev) {
23882603 cancel_work_sync(&nvdev->subchan_work);
2604
+ netvsc_xdp_set(net, NULL, NULL, nvdev);
2605
+ }
23892606
23902607 /*
23912608 * Call to the vsc driver to let it know that the device is being
....@@ -2410,6 +2627,66 @@
24102627 return 0;
24112628 }
24122629
2630
+static int netvsc_suspend(struct hv_device *dev)
2631
+{
2632
+ struct net_device_context *ndev_ctx;
2633
+ struct netvsc_device *nvdev;
2634
+ struct net_device *net;
2635
+ int ret;
2636
+
2637
+ net = hv_get_drvdata(dev);
2638
+
2639
+ ndev_ctx = netdev_priv(net);
2640
+ cancel_delayed_work_sync(&ndev_ctx->dwork);
2641
+
2642
+ rtnl_lock();
2643
+
2644
+ nvdev = rtnl_dereference(ndev_ctx->nvdev);
2645
+ if (nvdev == NULL) {
2646
+ ret = -ENODEV;
2647
+ goto out;
2648
+ }
2649
+
2650
+ /* Save the current config info */
2651
+ ndev_ctx->saved_netvsc_dev_info = netvsc_devinfo_get(nvdev);
2652
+ if (!ndev_ctx->saved_netvsc_dev_info) {
2653
+ ret = -ENOMEM;
2654
+ goto out;
2655
+ }
2656
+ ret = netvsc_detach(net, nvdev);
2657
+out:
2658
+ rtnl_unlock();
2659
+
2660
+ return ret;
2661
+}
2662
+
2663
+static int netvsc_resume(struct hv_device *dev)
2664
+{
2665
+ struct net_device *net = hv_get_drvdata(dev);
2666
+ struct net_device_context *net_device_ctx;
2667
+ struct netvsc_device_info *device_info;
2668
+ int ret;
2669
+
2670
+ rtnl_lock();
2671
+
2672
+ net_device_ctx = netdev_priv(net);
2673
+
2674
+ /* Reset the data path to the netvsc NIC before re-opening the vmbus
2675
+ * channel. Later netvsc_netdev_event() will switch the data path to
2676
+ * the VF upon the UP or CHANGE event.
2677
+ */
2678
+ net_device_ctx->data_path_is_vf = false;
2679
+ device_info = net_device_ctx->saved_netvsc_dev_info;
2680
+
2681
+ ret = netvsc_attach(net, device_info);
2682
+
2683
+ netvsc_devinfo_put(device_info);
2684
+ net_device_ctx->saved_netvsc_dev_info = NULL;
2685
+
2686
+ rtnl_unlock();
2687
+
2688
+ return ret;
2689
+}
24132690 static const struct hv_vmbus_device_id id_table[] = {
24142691 /* Network guid */
24152692 { HV_NIC_GUID, },
....@@ -2424,6 +2701,8 @@
24242701 .id_table = id_table,
24252702 .probe = netvsc_probe,
24262703 .remove = netvsc_remove,
2704
+ .suspend = netvsc_suspend,
2705
+ .resume = netvsc_resume,
24272706 .driver = {
24282707 .probe_type = PROBE_FORCE_SYNCHRONOUS,
24292708 },
....@@ -2464,6 +2743,7 @@
24642743 return netvsc_unregister_vf(event_dev);
24652744 case NETDEV_UP:
24662745 case NETDEV_DOWN:
2746
+ case NETDEV_CHANGE:
24672747 return netvsc_vf_changed(event_dev);
24682748 default:
24692749 return NOTIFY_DONE;