hc
2024-05-14 bedbef8ad3e75a304af6361af235302bcc61d06b
kernel/drivers/net/virtio_net.c
....@@ -1,19 +1,7 @@
1
+// SPDX-License-Identifier: GPL-2.0-or-later
12 /* A network driver using virtio.
23 *
34 * Copyright 2007 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation
4
- *
5
- * This program is free software; you can redistribute it and/or modify
6
- * it under the terms of the GNU General Public License as published by
7
- * the Free Software Foundation; either version 2 of the License, or
8
- * (at your option) any later version.
9
- *
10
- * This program is distributed in the hope that it will be useful,
11
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
- * GNU General Public License for more details.
14
- *
15
- * You should have received a copy of the GNU General Public License
16
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
175 */
186 //#define DEBUG
197 #include <linux/netdevice.h>
....@@ -31,7 +19,6 @@
3119 #include <linux/average.h>
3220 #include <linux/filter.h>
3321 #include <linux/kernel.h>
34
-#include <linux/pci.h>
3522 #include <net/route.h>
3623 #include <net/xdp.h>
3724 #include <net/net_failover.h>
....@@ -39,7 +26,7 @@
3926 static int napi_weight = NAPI_POLL_WEIGHT;
4027 module_param(napi_weight, int, 0444);
4128
42
-static bool csum = true, gso = true, napi_tx;
29
+static bool csum = true, gso = true, napi_tx = true;
4330 module_param(csum, bool, 0444);
4431 module_param(gso, bool, 0444);
4532 module_param(napi_tx, bool, 0644);
....@@ -75,6 +62,11 @@
7562 VIRTIO_NET_F_GUEST_UFO,
7663 VIRTIO_NET_F_GUEST_CSUM
7764 };
65
+
66
+#define GUEST_OFFLOAD_GRO_HW_MASK ((1ULL << VIRTIO_NET_F_GUEST_TSO4) | \
67
+ (1ULL << VIRTIO_NET_F_GUEST_TSO6) | \
68
+ (1ULL << VIRTIO_NET_F_GUEST_ECN) | \
69
+ (1ULL << VIRTIO_NET_F_GUEST_UFO))
7870
7971 struct virtnet_stat_desc {
8072 char desc[ETH_GSTRING_LEN];
....@@ -203,6 +195,9 @@
203195 /* # of XDP queue pairs currently used by the driver */
204196 u16 xdp_queue_pairs;
205197
198
+ /* xdp_queue_pairs may be 0, when xdp is already loaded. So add this. */
199
+ bool xdp_enabled;
200
+
206201 /* I like... big packets and I cannot lie! */
207202 bool big_packets;
208203
....@@ -218,8 +213,14 @@
218213 /* Packet virtio header size */
219214 u8 hdr_len;
220215
221
- /* Work struct for refilling if we run low on memory. */
216
+ /* Work struct for delayed refilling if we run low on memory. */
222217 struct delayed_work refill;
218
+
219
+ /* Is delayed refill enabled? */
220
+ bool refill_enabled;
221
+
222
+ /* The lock to synchronize the access to refill_enabled */
223
+ spinlock_t refill_lock;
223224
224225 /* Work struct for config space updates */
225226 struct work_struct config_work;
....@@ -238,6 +239,7 @@
238239 u32 speed;
239240
240241 unsigned long guest_offloads;
242
+ unsigned long guest_offloads_capable;
241243
242244 /* failover when STANDBY feature enabled */
243245 struct failover *failover;
....@@ -321,6 +323,20 @@
321323 } else
322324 p = alloc_page(gfp_mask);
323325 return p;
326
+}
327
+
328
+static void enable_delayed_refill(struct virtnet_info *vi)
329
+{
330
+ spin_lock_bh(&vi->refill_lock);
331
+ vi->refill_enabled = true;
332
+ spin_unlock_bh(&vi->refill_lock);
333
+}
334
+
335
+static void disable_delayed_refill(struct virtnet_info *vi)
336
+{
337
+ spin_lock_bh(&vi->refill_lock);
338
+ vi->refill_enabled = false;
339
+ spin_unlock_bh(&vi->refill_lock);
324340 }
325341
326342 static void virtqueue_napi_schedule(struct napi_struct *napi,
....@@ -492,12 +508,41 @@
492508 return 0;
493509 }
494510
495
-static struct send_queue *virtnet_xdp_sq(struct virtnet_info *vi)
496
-{
497
- unsigned int qp;
511
+/* when vi->curr_queue_pairs > nr_cpu_ids, the txq/sq is only used for xdp tx on
512
+ * the current cpu, so it does not need to be locked.
513
+ *
514
+ * Here we use marco instead of inline functions because we have to deal with
515
+ * three issues at the same time: 1. the choice of sq. 2. judge and execute the
516
+ * lock/unlock of txq 3. make sparse happy. It is difficult for two inline
517
+ * functions to perfectly solve these three problems at the same time.
518
+ */
519
+#define virtnet_xdp_get_sq(vi) ({ \
520
+ struct netdev_queue *txq; \
521
+ typeof(vi) v = (vi); \
522
+ unsigned int qp; \
523
+ \
524
+ if (v->curr_queue_pairs > nr_cpu_ids) { \
525
+ qp = v->curr_queue_pairs - v->xdp_queue_pairs; \
526
+ qp += smp_processor_id(); \
527
+ txq = netdev_get_tx_queue(v->dev, qp); \
528
+ __netif_tx_acquire(txq); \
529
+ } else { \
530
+ qp = smp_processor_id() % v->curr_queue_pairs; \
531
+ txq = netdev_get_tx_queue(v->dev, qp); \
532
+ __netif_tx_lock(txq, raw_smp_processor_id()); \
533
+ } \
534
+ v->sq + qp; \
535
+})
498536
499
- qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + smp_processor_id();
500
- return &vi->sq[qp];
537
+#define virtnet_xdp_put_sq(vi, q) { \
538
+ struct netdev_queue *txq; \
539
+ typeof(vi) v = (vi); \
540
+ \
541
+ txq = netdev_get_tx_queue(v->dev, (q) - v->sq); \
542
+ if (v->curr_queue_pairs > nr_cpu_ids) \
543
+ __netif_tx_release(txq); \
544
+ else \
545
+ __netif_tx_unlock(txq); \
501546 }
502547
503548 static int virtnet_xdp_xmit(struct net_device *dev,
....@@ -519,11 +564,11 @@
519564 /* Only allow ndo_xdp_xmit if XDP is loaded on dev, as this
520565 * indicate XDP resources have been successfully allocated.
521566 */
522
- xdp_prog = rcu_dereference(rq->xdp_prog);
567
+ xdp_prog = rcu_access_pointer(rq->xdp_prog);
523568 if (!xdp_prog)
524569 return -ENXIO;
525570
526
- sq = virtnet_xdp_sq(vi);
571
+ sq = virtnet_xdp_get_sq(vi);
527572
528573 if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) {
529574 ret = -EINVAL;
....@@ -571,12 +616,13 @@
571616 sq->stats.kicks += kicks;
572617 u64_stats_update_end(&sq->stats.syncp);
573618
619
+ virtnet_xdp_put_sq(vi, sq);
574620 return ret;
575621 }
576622
577623 static unsigned int virtnet_get_headroom(struct virtnet_info *vi)
578624 {
579
- return vi->xdp_queue_pairs ? VIRTIO_XDP_HEADROOM : 0;
625
+ return vi->xdp_enabled ? VIRTIO_XDP_HEADROOM : 0;
580626 }
581627
582628 /* We copy the packet for XDP in the following cases:
....@@ -600,8 +646,13 @@
600646 int page_off,
601647 unsigned int *len)
602648 {
603
- struct page *page = alloc_page(GFP_ATOMIC);
649
+ int tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
650
+ struct page *page;
604651
652
+ if (page_off + *len + tailroom > PAGE_SIZE)
653
+ return NULL;
654
+
655
+ page = alloc_page(GFP_ATOMIC);
605656 if (!page)
606657 return NULL;
607658
....@@ -609,7 +660,6 @@
609660 page_off += *len;
610661
611662 while (--*num_buf) {
612
- int tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
613663 unsigned int buflen;
614664 void *buf;
615665 int off;
....@@ -667,6 +717,12 @@
667717 len -= vi->hdr_len;
668718 stats->bytes += len;
669719
720
+ if (unlikely(len > GOOD_PACKET_LEN)) {
721
+ pr_debug("%s: rx error: len %u exceeds max size %d\n",
722
+ dev->name, len, GOOD_PACKET_LEN);
723
+ dev->stats.rx_length_errors++;
724
+ goto err_len;
725
+ }
670726 rcu_read_lock();
671727 xdp_prog = rcu_dereference(rq->xdp_prog);
672728 if (xdp_prog) {
....@@ -705,6 +761,7 @@
705761 xdp.data_end = xdp.data + len;
706762 xdp.data_meta = xdp.data;
707763 xdp.rxq = &rq->xdp_rxq;
764
+ xdp.frame_sz = buflen;
708765 orig_data = xdp.data;
709766 act = bpf_prog_run_xdp(xdp_prog, &xdp);
710767 stats->xdp_packets++;
....@@ -718,7 +775,7 @@
718775 break;
719776 case XDP_TX:
720777 stats->xdp_tx++;
721
- xdpf = convert_to_xdp_frame(&xdp);
778
+ xdpf = xdp_convert_buff_to_frame(&xdp);
722779 if (unlikely(!xdpf))
723780 goto err_xdp;
724781 err = virtnet_xdp_xmit(dev, 1, &xdpf, 0);
....@@ -739,7 +796,7 @@
739796 goto xdp_xmit;
740797 default:
741798 bpf_warn_invalid_xdp_action(act);
742
- /* fall through */
799
+ fallthrough;
743800 case XDP_ABORTED:
744801 trace_xdp_exception(vi->dev, xdp_prog, act);
745802 case XDP_DROP:
....@@ -755,10 +812,10 @@
755812 }
756813 skb_reserve(skb, headroom - delta);
757814 skb_put(skb, len);
758
- if (!delta) {
815
+ if (!xdp_prog) {
759816 buf += header_offset;
760817 memcpy(skb_vnet_hdr(skb), buf, vi->hdr_len);
761
- } /* keep zeroed vnet hdr since packet was changed by bpf */
818
+ } /* keep zeroed vnet hdr since XDP is loaded */
762819
763820 if (metasize)
764821 skb_metadata_set(skb, metasize);
....@@ -769,6 +826,7 @@
769826 err_xdp:
770827 rcu_read_unlock();
771828 stats->xdp_drops++;
829
+err_len:
772830 stats->drops++;
773831 put_page(page);
774832 xdp_xmit:
....@@ -813,14 +871,21 @@
813871 int offset = buf - page_address(page);
814872 struct sk_buff *head_skb, *curr_skb;
815873 struct bpf_prog *xdp_prog;
816
- unsigned int truesize;
874
+ unsigned int truesize = mergeable_ctx_to_truesize(ctx);
817875 unsigned int headroom = mergeable_ctx_to_headroom(ctx);
818
- int err;
819876 unsigned int metasize = 0;
877
+ unsigned int frame_sz;
878
+ int err;
820879
821880 head_skb = NULL;
822881 stats->bytes += len - vi->hdr_len;
823882
883
+ if (unlikely(len > truesize)) {
884
+ pr_debug("%s: rx error: len %u exceeds truesize %lu\n",
885
+ dev->name, len, (unsigned long)ctx);
886
+ dev->stats.rx_length_errors++;
887
+ goto err_skb;
888
+ }
824889 rcu_read_lock();
825890 xdp_prog = rcu_dereference(rq->xdp_prog);
826891 if (xdp_prog) {
....@@ -837,6 +902,11 @@
837902 if (unlikely(hdr->hdr.gso_type))
838903 goto err_xdp;
839904
905
+ /* Buffers with headroom use PAGE_SIZE as alloc size,
906
+ * see add_recvbuf_mergeable() + get_mergeable_buf_len()
907
+ */
908
+ frame_sz = headroom ? PAGE_SIZE : truesize;
909
+
840910 /* This happens when rx buffer size is underestimated
841911 * or headroom is not enough because of the buffer
842912 * was refilled before XDP is set. This should only
....@@ -850,6 +920,8 @@
850920 page, offset,
851921 VIRTIO_XDP_HEADROOM,
852922 &len);
923
+ frame_sz = PAGE_SIZE;
924
+
853925 if (!xdp_page)
854926 goto err_xdp;
855927 offset = VIRTIO_XDP_HEADROOM;
....@@ -866,6 +938,7 @@
866938 xdp.data_end = xdp.data + (len - vi->hdr_len);
867939 xdp.data_meta = xdp.data;
868940 xdp.rxq = &rq->xdp_rxq;
941
+ xdp.frame_sz = frame_sz - vi->hdr_len;
869942
870943 act = bpf_prog_run_xdp(xdp_prog, &xdp);
871944 stats->xdp_packets++;
....@@ -898,9 +971,12 @@
898971 break;
899972 case XDP_TX:
900973 stats->xdp_tx++;
901
- xdpf = convert_to_xdp_frame(&xdp);
902
- if (unlikely(!xdpf))
974
+ xdpf = xdp_convert_buff_to_frame(&xdp);
975
+ if (unlikely(!xdpf)) {
976
+ if (unlikely(xdp_page != page))
977
+ put_page(xdp_page);
903978 goto err_xdp;
979
+ }
904980 err = virtnet_xdp_xmit(dev, 1, &xdpf, 0);
905981 if (unlikely(err < 0)) {
906982 trace_xdp_exception(vi->dev, xdp_prog, act);
....@@ -928,10 +1004,10 @@
9281004 goto xdp_xmit;
9291005 default:
9301006 bpf_warn_invalid_xdp_action(act);
931
- /* fall through */
1007
+ fallthrough;
9321008 case XDP_ABORTED:
9331009 trace_xdp_exception(vi->dev, xdp_prog, act);
934
- /* fall through */
1010
+ fallthrough;
9351011 case XDP_DROP:
9361012 if (unlikely(xdp_page != page))
9371013 __free_pages(xdp_page, 0);
....@@ -939,14 +1015,6 @@
9391015 }
9401016 }
9411017 rcu_read_unlock();
942
-
943
- truesize = mergeable_ctx_to_truesize(ctx);
944
- if (unlikely(len > truesize)) {
945
- pr_debug("%s: rx error: len %u exceeds truesize %lu\n",
946
- dev->name, len, (unsigned long)ctx);
947
- dev->stats.rx_length_errors++;
948
- goto err_skb;
949
- }
9501018
9511019 head_skb = page_to_skb(vi, rq, page, offset, len, truesize, !xdp_prog,
9521020 metasize);
....@@ -1081,6 +1149,7 @@
10811149 goto frame_err;
10821150 }
10831151
1152
+ skb_record_rx_queue(skb, vq2rxq(rq->vq));
10841153 skb->protocol = eth_type_trans(skb, dev);
10851154 pr_debug("Receiving skb proto 0x%04x len %i type %i\n",
10861155 ntohs(skb->protocol), skb->len, skb->pkt_type);
....@@ -1360,9 +1429,13 @@
13601429 }
13611430 }
13621431
1363
- if (rq->vq->num_free > virtqueue_get_vring_size(rq->vq) / 2) {
1364
- if (!try_fill_recv(vi, rq, GFP_ATOMIC))
1365
- schedule_delayed_work(&vi->refill, 0);
1432
+ if (rq->vq->num_free > min((unsigned int)budget, virtqueue_get_vring_size(rq->vq)) / 2) {
1433
+ if (!try_fill_recv(vi, rq, GFP_ATOMIC)) {
1434
+ spin_lock(&vi->refill_lock);
1435
+ if (vi->refill_enabled)
1436
+ schedule_delayed_work(&vi->refill, 0);
1437
+ spin_unlock(&vi->refill_lock);
1438
+ }
13661439 }
13671440
13681441 u64_stats_update_begin(&rq->stats.syncp);
....@@ -1456,20 +1529,21 @@
14561529
14571530 received = virtnet_receive(rq, budget, &xdp_xmit);
14581531
1532
+ if (xdp_xmit & VIRTIO_XDP_REDIR)
1533
+ xdp_do_flush();
1534
+
14591535 /* Out of packets? */
14601536 if (received < budget)
14611537 virtqueue_napi_complete(napi, rq->vq, received);
14621538
1463
- if (xdp_xmit & VIRTIO_XDP_REDIR)
1464
- xdp_do_flush_map();
1465
-
14661539 if (xdp_xmit & VIRTIO_XDP_TX) {
1467
- sq = virtnet_xdp_sq(vi);
1540
+ sq = virtnet_xdp_get_sq(vi);
14681541 if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) {
14691542 u64_stats_update_begin(&sq->stats.syncp);
14701543 sq->stats.kicks++;
14711544 u64_stats_update_end(&sq->stats.syncp);
14721545 }
1546
+ virtnet_xdp_put_sq(vi, sq);
14731547 }
14741548
14751549 return received;
....@@ -1479,6 +1553,8 @@
14791553 {
14801554 struct virtnet_info *vi = netdev_priv(dev);
14811555 int i, err;
1556
+
1557
+ enable_delayed_refill(vi);
14821558
14831559 for (i = 0; i < vi->max_queue_pairs; i++) {
14841560 if (i < vi->curr_queue_pairs)
....@@ -1604,7 +1680,7 @@
16041680 struct send_queue *sq = &vi->sq[qnum];
16051681 int err;
16061682 struct netdev_queue *txq = netdev_get_tx_queue(dev, qnum);
1607
- bool kick = !skb->xmit_more;
1683
+ bool kick = !netdev_xmit_more();
16081684 bool use_napi = sq->napi.weight;
16091685
16101686 /* Free up any pending old buffers before queueing new ones. */
....@@ -1624,7 +1700,8 @@
16241700 dev->stats.tx_fifo_errors++;
16251701 if (net_ratelimit())
16261702 dev_warn(&dev->dev,
1627
- "Unexpected TXQ (%d) queue failure: %d\n", qnum, err);
1703
+ "Unexpected TXQ (%d) queue failure: %d\n",
1704
+ qnum, err);
16281705 dev->stats.tx_dropped++;
16291706 dev_kfree_skb_any(skb);
16301707 return NETDEV_TX_OK;
....@@ -1633,7 +1710,7 @@
16331710 /* Don't wait up for transmitted skbs to be freed. */
16341711 if (!use_napi) {
16351712 skb_orphan(skb);
1636
- nf_reset(skb);
1713
+ nf_reset_ct(skb);
16371714 }
16381715
16391716 /* If running out of space, stop queue to avoid getting packets that we
....@@ -1849,12 +1926,14 @@
18491926 struct virtnet_info *vi = netdev_priv(dev);
18501927 int i;
18511928
1929
+ /* Make sure NAPI doesn't schedule refill work */
1930
+ disable_delayed_refill(vi);
18521931 /* Make sure refill_work doesn't re-enable napi! */
18531932 cancel_delayed_work_sync(&vi->refill);
18541933
18551934 for (i = 0; i < vi->max_queue_pairs; i++) {
1856
- xdp_rxq_info_unreg(&vi->rq[i].xdp_rxq);
18571935 napi_disable(&vi->rq[i].napi);
1936
+ xdp_rxq_info_unreg(&vi->rq[i].xdp_rxq);
18581937 virtnet_napi_tx_disable(&vi->sq[i].napi);
18591938 }
18601939
....@@ -1961,7 +2040,7 @@
19612040 return 0;
19622041 }
19632042
1964
-static void virtnet_clean_affinity(struct virtnet_info *vi, long hcpu)
2043
+static void virtnet_clean_affinity(struct virtnet_info *vi)
19652044 {
19662045 int i;
19672046
....@@ -1985,7 +2064,7 @@
19852064 int stride;
19862065
19872066 if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) {
1988
- virtnet_clean_affinity(vi, -1);
2067
+ virtnet_clean_affinity(vi);
19892068 return;
19902069 }
19912070
....@@ -2035,7 +2114,7 @@
20352114 struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info,
20362115 node);
20372116
2038
- virtnet_clean_affinity(vi, cpu);
2117
+ virtnet_clean_affinity(vi);
20392118 return 0;
20402119 }
20412120
....@@ -2216,48 +2295,13 @@
22162295 channels->other_count = 0;
22172296 }
22182297
2219
-/* Check if the user is trying to change anything besides speed/duplex */
2220
-static bool
2221
-virtnet_validate_ethtool_cmd(const struct ethtool_link_ksettings *cmd)
2222
-{
2223
- struct ethtool_link_ksettings diff1 = *cmd;
2224
- struct ethtool_link_ksettings diff2 = {};
2225
-
2226
- /* cmd is always set so we need to clear it, validate the port type
2227
- * and also without autonegotiation we can ignore advertising
2228
- */
2229
- diff1.base.speed = 0;
2230
- diff2.base.port = PORT_OTHER;
2231
- ethtool_link_ksettings_zero_link_mode(&diff1, advertising);
2232
- diff1.base.duplex = 0;
2233
- diff1.base.cmd = 0;
2234
- diff1.base.link_mode_masks_nwords = 0;
2235
-
2236
- return !memcmp(&diff1.base, &diff2.base, sizeof(diff1.base)) &&
2237
- bitmap_empty(diff1.link_modes.supported,
2238
- __ETHTOOL_LINK_MODE_MASK_NBITS) &&
2239
- bitmap_empty(diff1.link_modes.advertising,
2240
- __ETHTOOL_LINK_MODE_MASK_NBITS) &&
2241
- bitmap_empty(diff1.link_modes.lp_advertising,
2242
- __ETHTOOL_LINK_MODE_MASK_NBITS);
2243
-}
2244
-
22452298 static int virtnet_set_link_ksettings(struct net_device *dev,
22462299 const struct ethtool_link_ksettings *cmd)
22472300 {
22482301 struct virtnet_info *vi = netdev_priv(dev);
2249
- u32 speed;
22502302
2251
- speed = cmd->base.speed;
2252
- /* don't allow custom speed and duplex */
2253
- if (!ethtool_validate_speed(speed) ||
2254
- !ethtool_validate_duplex(cmd->base.duplex) ||
2255
- !virtnet_validate_ethtool_cmd(cmd))
2256
- return -EINVAL;
2257
- vi->speed = speed;
2258
- vi->duplex = cmd->base.duplex;
2259
-
2260
- return 0;
2303
+ return ethtool_virtdev_set_link_ksettings(dev, cmd,
2304
+ &vi->speed, &vi->duplex);
22612305 }
22622306
22632307 static int virtnet_get_link_ksettings(struct net_device *dev,
....@@ -2268,6 +2312,44 @@
22682312 cmd->base.speed = vi->speed;
22692313 cmd->base.duplex = vi->duplex;
22702314 cmd->base.port = PORT_OTHER;
2315
+
2316
+ return 0;
2317
+}
2318
+
2319
+static int virtnet_set_coalesce(struct net_device *dev,
2320
+ struct ethtool_coalesce *ec)
2321
+{
2322
+ struct virtnet_info *vi = netdev_priv(dev);
2323
+ int i, napi_weight;
2324
+
2325
+ if (ec->tx_max_coalesced_frames > 1 ||
2326
+ ec->rx_max_coalesced_frames != 1)
2327
+ return -EINVAL;
2328
+
2329
+ napi_weight = ec->tx_max_coalesced_frames ? NAPI_POLL_WEIGHT : 0;
2330
+ if (napi_weight ^ vi->sq[0].napi.weight) {
2331
+ if (dev->flags & IFF_UP)
2332
+ return -EBUSY;
2333
+ for (i = 0; i < vi->max_queue_pairs; i++)
2334
+ vi->sq[i].napi.weight = napi_weight;
2335
+ }
2336
+
2337
+ return 0;
2338
+}
2339
+
2340
+static int virtnet_get_coalesce(struct net_device *dev,
2341
+ struct ethtool_coalesce *ec)
2342
+{
2343
+ struct ethtool_coalesce ec_default = {
2344
+ .cmd = ETHTOOL_GCOALESCE,
2345
+ .rx_max_coalesced_frames = 1,
2346
+ };
2347
+ struct virtnet_info *vi = netdev_priv(dev);
2348
+
2349
+ memcpy(ec, &ec_default, sizeof(ec_default));
2350
+
2351
+ if (vi->sq[0].napi.weight)
2352
+ ec->tx_max_coalesced_frames = 1;
22712353
22722354 return 0;
22732355 }
....@@ -2288,17 +2370,19 @@
22882370 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_SPEED_DUPLEX))
22892371 return;
22902372
2291
- speed = virtio_cread32(vi->vdev, offsetof(struct virtio_net_config,
2292
- speed));
2373
+ virtio_cread_le(vi->vdev, struct virtio_net_config, speed, &speed);
2374
+
22932375 if (ethtool_validate_speed(speed))
22942376 vi->speed = speed;
2295
- duplex = virtio_cread8(vi->vdev, offsetof(struct virtio_net_config,
2296
- duplex));
2377
+
2378
+ virtio_cread_le(vi->vdev, struct virtio_net_config, duplex, &duplex);
2379
+
22972380 if (ethtool_validate_duplex(duplex))
22982381 vi->duplex = duplex;
22992382 }
23002383
23012384 static const struct ethtool_ops virtnet_ethtool_ops = {
2385
+ .supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES,
23022386 .get_drvinfo = virtnet_get_drvinfo,
23032387 .get_link = ethtool_op_get_link,
23042388 .get_ringparam = virtnet_get_ringparam,
....@@ -2310,12 +2394,13 @@
23102394 .get_ts_info = ethtool_op_get_ts_info,
23112395 .get_link_ksettings = virtnet_get_link_ksettings,
23122396 .set_link_ksettings = virtnet_set_link_ksettings,
2397
+ .set_coalesce = virtnet_set_coalesce,
2398
+ .get_coalesce = virtnet_get_coalesce,
23132399 };
23142400
23152401 static void virtnet_freeze_down(struct virtio_device *vdev)
23162402 {
23172403 struct virtnet_info *vi = vdev->priv;
2318
- int i;
23192404
23202405 /* Make sure no work handler is accessing the device */
23212406 flush_work(&vi->config_work);
....@@ -2323,14 +2408,8 @@
23232408 netif_tx_lock_bh(vi->dev);
23242409 netif_device_detach(vi->dev);
23252410 netif_tx_unlock_bh(vi->dev);
2326
- cancel_delayed_work_sync(&vi->refill);
2327
-
2328
- if (netif_running(vi->dev)) {
2329
- for (i = 0; i < vi->max_queue_pairs; i++) {
2330
- napi_disable(&vi->rq[i].napi);
2331
- virtnet_napi_tx_disable(&vi->sq[i].napi);
2332
- }
2333
- }
2411
+ if (netif_running(vi->dev))
2412
+ virtnet_close(vi->dev);
23342413 }
23352414
23362415 static int init_vqs(struct virtnet_info *vi);
....@@ -2338,7 +2417,7 @@
23382417 static int virtnet_restore_up(struct virtio_device *vdev)
23392418 {
23402419 struct virtnet_info *vi = vdev->priv;
2341
- int err, i;
2420
+ int err;
23422421
23432422 err = init_vqs(vi);
23442423 if (err)
....@@ -2346,16 +2425,12 @@
23462425
23472426 virtio_device_ready(vdev);
23482427
2349
- if (netif_running(vi->dev)) {
2350
- for (i = 0; i < vi->curr_queue_pairs; i++)
2351
- if (!try_fill_recv(vi, &vi->rq[i], GFP_KERNEL))
2352
- schedule_delayed_work(&vi->refill, 0);
2428
+ enable_delayed_refill(vi);
23532429
2354
- for (i = 0; i < vi->max_queue_pairs; i++) {
2355
- virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi);
2356
- virtnet_napi_tx_enable(vi, vi->sq[i].vq,
2357
- &vi->sq[i].napi);
2358
- }
2430
+ if (netif_running(vi->dev)) {
2431
+ err = virtnet_open(vi->dev);
2432
+ if (err)
2433
+ return err;
23592434 }
23602435
23612436 netif_tx_lock_bh(vi->dev);
....@@ -2373,7 +2448,7 @@
23732448
23742449 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_GUEST_OFFLOADS,
23752450 VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET, &sg)) {
2376
- dev_warn(&vi->dev->dev, "Fail to set guest offload. \n");
2451
+ dev_warn(&vi->dev->dev, "Fail to set guest offload.\n");
23772452 return -EINVAL;
23782453 }
23792454
....@@ -2415,7 +2490,7 @@
24152490 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) ||
24162491 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO) ||
24172492 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_CSUM))) {
2418
- NL_SET_ERR_MSG_MOD(extack, "Can't set XDP while host is implementing LRO/CSUM, disable LRO/CSUM first");
2493
+ NL_SET_ERR_MSG_MOD(extack, "Can't set XDP while host is implementing GRO_HW/CSUM, disable GRO_HW/CSUM first");
24192494 return -EOPNOTSUPP;
24202495 }
24212496
....@@ -2436,21 +2511,17 @@
24362511
24372512 /* XDP requires extra queues for XDP_TX */
24382513 if (curr_qp + xdp_qp > vi->max_queue_pairs) {
2439
- NL_SET_ERR_MSG_MOD(extack, "Too few free TX rings available");
2440
- netdev_warn(dev, "request %i queues but max is %i\n",
2514
+ netdev_warn(dev, "XDP request %i queues but max is %i. XDP_TX and XDP_REDIRECT will operate in a slower locked tx mode.\n",
24412515 curr_qp + xdp_qp, vi->max_queue_pairs);
2442
- return -ENOMEM;
2516
+ xdp_qp = 0;
24432517 }
24442518
24452519 old_prog = rtnl_dereference(vi->rq[0].xdp_prog);
24462520 if (!prog && !old_prog)
24472521 return 0;
24482522
2449
- if (prog) {
2450
- prog = bpf_prog_add(prog, vi->max_queue_pairs - 1);
2451
- if (IS_ERR(prog))
2452
- return PTR_ERR(prog);
2453
- }
2523
+ if (prog)
2524
+ bpf_prog_add(prog, vi->max_queue_pairs - 1);
24542525
24552526 /* Make sure NAPI is not using any XDP TX queues for RX. */
24562527 if (netif_running(dev)) {
....@@ -2476,11 +2547,14 @@
24762547 vi->xdp_queue_pairs = xdp_qp;
24772548
24782549 if (prog) {
2550
+ vi->xdp_enabled = true;
24792551 for (i = 0; i < vi->max_queue_pairs; i++) {
24802552 rcu_assign_pointer(vi->rq[i].xdp_prog, prog);
24812553 if (i == 0 && !old_prog)
24822554 virtnet_clear_guest_offloads(vi);
24832555 }
2556
+ } else {
2557
+ vi->xdp_enabled = false;
24842558 }
24852559
24862560 for (i = 0; i < vi->max_queue_pairs; i++) {
....@@ -2514,28 +2588,11 @@
25142588 return err;
25152589 }
25162590
2517
-static u32 virtnet_xdp_query(struct net_device *dev)
2518
-{
2519
- struct virtnet_info *vi = netdev_priv(dev);
2520
- const struct bpf_prog *xdp_prog;
2521
- int i;
2522
-
2523
- for (i = 0; i < vi->max_queue_pairs; i++) {
2524
- xdp_prog = rtnl_dereference(vi->rq[i].xdp_prog);
2525
- if (xdp_prog)
2526
- return xdp_prog->aux->id;
2527
- }
2528
- return 0;
2529
-}
2530
-
25312591 static int virtnet_xdp(struct net_device *dev, struct netdev_bpf *xdp)
25322592 {
25332593 switch (xdp->command) {
25342594 case XDP_SETUP_PROG:
25352595 return virtnet_xdp_set(dev, xdp->prog, xdp->extack);
2536
- case XDP_QUERY_PROG:
2537
- xdp->prog_id = virtnet_xdp_query(dev);
2538
- return 0;
25392596 default:
25402597 return -EINVAL;
25412598 }
....@@ -2557,6 +2614,35 @@
25572614 return 0;
25582615 }
25592616
2617
+static int virtnet_set_features(struct net_device *dev,
2618
+ netdev_features_t features)
2619
+{
2620
+ struct virtnet_info *vi = netdev_priv(dev);
2621
+ u64 offloads;
2622
+ int err;
2623
+
2624
+ if (!vi->has_cvq)
2625
+ return 0;
2626
+
2627
+ if ((dev->features ^ features) & NETIF_F_GRO_HW) {
2628
+ if (vi->xdp_enabled)
2629
+ return -EBUSY;
2630
+
2631
+ if (features & NETIF_F_GRO_HW)
2632
+ offloads = vi->guest_offloads_capable;
2633
+ else
2634
+ offloads = vi->guest_offloads_capable &
2635
+ ~GUEST_OFFLOAD_GRO_HW_MASK;
2636
+
2637
+ err = virtnet_set_guest_offloads(vi, offloads);
2638
+ if (err)
2639
+ return err;
2640
+ vi->guest_offloads = offloads;
2641
+ }
2642
+
2643
+ return 0;
2644
+}
2645
+
25602646 static const struct net_device_ops virtnet_netdev = {
25612647 .ndo_open = virtnet_open,
25622648 .ndo_stop = virtnet_close,
....@@ -2571,6 +2657,7 @@
25712657 .ndo_xdp_xmit = virtnet_xdp_xmit,
25722658 .ndo_features_check = passthru_features_check,
25732659 .ndo_get_phys_port_name = virtnet_get_phys_port_name,
2660
+ .ndo_set_features = virtnet_set_features,
25742661 };
25752662
25762663 static void virtnet_config_changed_work(struct work_struct *work)
....@@ -2618,12 +2705,11 @@
26182705 int i;
26192706
26202707 for (i = 0; i < vi->max_queue_pairs; i++) {
2621
- napi_hash_del(&vi->rq[i].napi);
2622
- netif_napi_del(&vi->rq[i].napi);
2623
- netif_napi_del(&vi->sq[i].napi);
2708
+ __netif_napi_del(&vi->rq[i].napi);
2709
+ __netif_napi_del(&vi->sq[i].napi);
26242710 }
26252711
2626
- /* We called napi_hash_del() before netif_napi_del(),
2712
+ /* We called __netif_napi_del(),
26272713 * we need to respect an RCU grace period before freeing vi->rq
26282714 */
26292715 synchronize_net();
....@@ -2664,6 +2750,27 @@
26642750 put_page(vi->rq[i].alloc_frag.page);
26652751 }
26662752
2753
+static void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf)
2754
+{
2755
+ if (!is_xdp_frame(buf))
2756
+ dev_kfree_skb(buf);
2757
+ else
2758
+ xdp_return_frame(ptr_to_xdp(buf));
2759
+}
2760
+
2761
+static void virtnet_rq_free_unused_buf(struct virtqueue *vq, void *buf)
2762
+{
2763
+ struct virtnet_info *vi = vq->vdev->priv;
2764
+ int i = vq2rxq(vq);
2765
+
2766
+ if (vi->mergeable_rx_bufs)
2767
+ put_page(virt_to_head_page(buf));
2768
+ else if (vi->big_packets)
2769
+ give_pages(&vi->rq[i], buf);
2770
+ else
2771
+ put_page(virt_to_head_page(buf));
2772
+}
2773
+
26672774 static void free_unused_bufs(struct virtnet_info *vi)
26682775 {
26692776 void *buf;
....@@ -2671,26 +2778,16 @@
26712778
26722779 for (i = 0; i < vi->max_queue_pairs; i++) {
26732780 struct virtqueue *vq = vi->sq[i].vq;
2674
- while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) {
2675
- if (!is_xdp_frame(buf))
2676
- dev_kfree_skb(buf);
2677
- else
2678
- xdp_return_frame(ptr_to_xdp(buf));
2679
- }
2781
+ while ((buf = virtqueue_detach_unused_buf(vq)) != NULL)
2782
+ virtnet_sq_free_unused_buf(vq, buf);
2783
+ cond_resched();
26802784 }
26812785
26822786 for (i = 0; i < vi->max_queue_pairs; i++) {
26832787 struct virtqueue *vq = vi->rq[i].vq;
2684
-
2685
- while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) {
2686
- if (vi->mergeable_rx_bufs) {
2687
- put_page(virt_to_head_page(buf));
2688
- } else if (vi->big_packets) {
2689
- give_pages(&vi->rq[i], buf);
2690
- } else {
2691
- put_page(virt_to_head_page(buf));
2692
- }
2693
- }
2788
+ while ((buf = virtqueue_detach_unused_buf(vq)) != NULL)
2789
+ virtnet_rq_free_unused_buf(vq, buf);
2790
+ cond_resched();
26942791 }
26952792 }
26962793
....@@ -2698,7 +2795,7 @@
26982795 {
26992796 struct virtio_device *vdev = vi->vdev;
27002797
2701
- virtnet_clean_affinity(vi, -1);
2798
+ virtnet_clean_affinity(vi);
27022799
27032800 vdev->config->del_vqs(vdev);
27042801
....@@ -3019,6 +3116,11 @@
30193116 }
30203117 if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_CSUM))
30213118 dev->features |= NETIF_F_RXCSUM;
3119
+ if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) ||
3120
+ virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6))
3121
+ dev->features |= NETIF_F_GRO_HW;
3122
+ if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS))
3123
+ dev->hw_features |= NETIF_F_GRO_HW;
30223124
30233125 dev->vlan_features = dev->features;
30243126
....@@ -3041,6 +3143,7 @@
30413143 vdev->priv = vi;
30423144
30433145 INIT_WORK(&vi->config_work, virtnet_config_changed_work);
3146
+ spin_lock_init(&vi->refill_lock);
30443147
30453148 /* If we can receive ANY GSO packets, we must allocate large ones. */
30463149 if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) ||
....@@ -3073,8 +3176,10 @@
30733176 /* Should never trigger: MTU was previously validated
30743177 * in virtnet_validate.
30753178 */
3076
- dev_err(&vdev->dev, "device MTU appears to have changed "
3077
- "it is now %d < %d", mtu, dev->min_mtu);
3179
+ dev_err(&vdev->dev,
3180
+ "device MTU appears to have changed it is now %d < %d",
3181
+ mtu, dev->min_mtu);
3182
+ err = -EINVAL;
30783183 goto free;
30793184 }
30803185
....@@ -3118,21 +3223,27 @@
31183223 }
31193224 }
31203225
3121
- err = register_netdev(dev);
3226
+ /* serialize netdev register + virtio_device_ready() with ndo_open() */
3227
+ rtnl_lock();
3228
+
3229
+ err = register_netdevice(dev);
31223230 if (err) {
31233231 pr_debug("virtio_net: registering device failed\n");
3232
+ rtnl_unlock();
31243233 goto free_failover;
31253234 }
31263235
31273236 virtio_device_ready(vdev);
3237
+
3238
+ _virtnet_set_queues(vi, vi->curr_queue_pairs);
3239
+
3240
+ rtnl_unlock();
31283241
31293242 err = virtnet_cpu_notif_add(vi);
31303243 if (err) {
31313244 pr_debug("virtio_net: registering cpu notifier failed\n");
31323245 goto free_unregister_netdev;
31333246 }
3134
-
3135
- virtnet_set_queues(vi, vi->curr_queue_pairs);
31363247
31373248 /* Assume link up if device can't report link status,
31383249 otherwise get link status from config. */
....@@ -3148,6 +3259,7 @@
31483259 for (i = 0; i < ARRAY_SIZE(guest_offloads); i++)
31493260 if (virtio_has_feature(vi->vdev, guest_offloads[i]))
31503261 set_bit(guest_offloads[i], &vi->guest_offloads);
3262
+ vi->guest_offloads_capable = vi->guest_offloads;
31513263
31523264 pr_debug("virtnet: registered device %s with %d RX and TX vq's\n",
31533265 dev->name, max_queue_pairs);