| .. | .. |
|---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-or-later |
|---|
| 1 | 2 | /* A network driver using virtio. |
|---|
| 2 | 3 | * |
|---|
| 3 | 4 | * Copyright 2007 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation |
|---|
| 4 | | - * |
|---|
| 5 | | - * This program is free software; you can redistribute it and/or modify |
|---|
| 6 | | - * it under the terms of the GNU General Public License as published by |
|---|
| 7 | | - * the Free Software Foundation; either version 2 of the License, or |
|---|
| 8 | | - * (at your option) any later version. |
|---|
| 9 | | - * |
|---|
| 10 | | - * This program is distributed in the hope that it will be useful, |
|---|
| 11 | | - * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|---|
| 12 | | - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|---|
| 13 | | - * GNU General Public License for more details. |
|---|
| 14 | | - * |
|---|
| 15 | | - * You should have received a copy of the GNU General Public License |
|---|
| 16 | | - * along with this program; if not, see <http://www.gnu.org/licenses/>. |
|---|
| 17 | 5 | */ |
|---|
| 18 | 6 | //#define DEBUG |
|---|
| 19 | 7 | #include <linux/netdevice.h> |
|---|
| .. | .. |
|---|
| 31 | 19 | #include <linux/average.h> |
|---|
| 32 | 20 | #include <linux/filter.h> |
|---|
| 33 | 21 | #include <linux/kernel.h> |
|---|
| 34 | | -#include <linux/pci.h> |
|---|
| 35 | 22 | #include <net/route.h> |
|---|
| 36 | 23 | #include <net/xdp.h> |
|---|
| 37 | 24 | #include <net/net_failover.h> |
|---|
| .. | .. |
|---|
| 39 | 26 | static int napi_weight = NAPI_POLL_WEIGHT; |
|---|
| 40 | 27 | module_param(napi_weight, int, 0444); |
|---|
| 41 | 28 | |
|---|
| 42 | | -static bool csum = true, gso = true, napi_tx; |
|---|
| 29 | +static bool csum = true, gso = true, napi_tx = true; |
|---|
| 43 | 30 | module_param(csum, bool, 0444); |
|---|
| 44 | 31 | module_param(gso, bool, 0444); |
|---|
| 45 | 32 | module_param(napi_tx, bool, 0644); |
|---|
| .. | .. |
|---|
| 75 | 62 | VIRTIO_NET_F_GUEST_UFO, |
|---|
| 76 | 63 | VIRTIO_NET_F_GUEST_CSUM |
|---|
| 77 | 64 | }; |
|---|
| 65 | + |
|---|
| 66 | +#define GUEST_OFFLOAD_GRO_HW_MASK ((1ULL << VIRTIO_NET_F_GUEST_TSO4) | \ |
|---|
| 67 | + (1ULL << VIRTIO_NET_F_GUEST_TSO6) | \ |
|---|
| 68 | + (1ULL << VIRTIO_NET_F_GUEST_ECN) | \ |
|---|
| 69 | + (1ULL << VIRTIO_NET_F_GUEST_UFO)) |
|---|
| 78 | 70 | |
|---|
| 79 | 71 | struct virtnet_stat_desc { |
|---|
| 80 | 72 | char desc[ETH_GSTRING_LEN]; |
|---|
| .. | .. |
|---|
| 203 | 195 | /* # of XDP queue pairs currently used by the driver */ |
|---|
| 204 | 196 | u16 xdp_queue_pairs; |
|---|
| 205 | 197 | |
|---|
| 198 | + /* xdp_queue_pairs may be 0, when xdp is already loaded. So add this. */ |
|---|
| 199 | + bool xdp_enabled; |
|---|
| 200 | + |
|---|
| 206 | 201 | /* I like... big packets and I cannot lie! */ |
|---|
| 207 | 202 | bool big_packets; |
|---|
| 208 | 203 | |
|---|
| .. | .. |
|---|
| 218 | 213 | /* Packet virtio header size */ |
|---|
| 219 | 214 | u8 hdr_len; |
|---|
| 220 | 215 | |
|---|
| 221 | | - /* Work struct for refilling if we run low on memory. */ |
|---|
| 216 | + /* Work struct for delayed refilling if we run low on memory. */ |
|---|
| 222 | 217 | struct delayed_work refill; |
|---|
| 218 | + |
|---|
| 219 | + /* Is delayed refill enabled? */ |
|---|
| 220 | + bool refill_enabled; |
|---|
| 221 | + |
|---|
| 222 | + /* The lock to synchronize the access to refill_enabled */ |
|---|
| 223 | + spinlock_t refill_lock; |
|---|
| 223 | 224 | |
|---|
| 224 | 225 | /* Work struct for config space updates */ |
|---|
| 225 | 226 | struct work_struct config_work; |
|---|
| .. | .. |
|---|
| 238 | 239 | u32 speed; |
|---|
| 239 | 240 | |
|---|
| 240 | 241 | unsigned long guest_offloads; |
|---|
| 242 | + unsigned long guest_offloads_capable; |
|---|
| 241 | 243 | |
|---|
| 242 | 244 | /* failover when STANDBY feature enabled */ |
|---|
| 243 | 245 | struct failover *failover; |
|---|
| .. | .. |
|---|
| 321 | 323 | } else |
|---|
| 322 | 324 | p = alloc_page(gfp_mask); |
|---|
| 323 | 325 | return p; |
|---|
| 326 | +} |
|---|
| 327 | + |
|---|
| 328 | +static void enable_delayed_refill(struct virtnet_info *vi) |
|---|
| 329 | +{ |
|---|
| 330 | + spin_lock_bh(&vi->refill_lock); |
|---|
| 331 | + vi->refill_enabled = true; |
|---|
| 332 | + spin_unlock_bh(&vi->refill_lock); |
|---|
| 333 | +} |
|---|
| 334 | + |
|---|
| 335 | +static void disable_delayed_refill(struct virtnet_info *vi) |
|---|
| 336 | +{ |
|---|
| 337 | + spin_lock_bh(&vi->refill_lock); |
|---|
| 338 | + vi->refill_enabled = false; |
|---|
| 339 | + spin_unlock_bh(&vi->refill_lock); |
|---|
| 324 | 340 | } |
|---|
| 325 | 341 | |
|---|
| 326 | 342 | static void virtqueue_napi_schedule(struct napi_struct *napi, |
|---|
| .. | .. |
|---|
| 492 | 508 | return 0; |
|---|
| 493 | 509 | } |
|---|
| 494 | 510 | |
|---|
| 495 | | -static struct send_queue *virtnet_xdp_sq(struct virtnet_info *vi) |
|---|
| 496 | | -{ |
|---|
| 497 | | - unsigned int qp; |
|---|
| 511 | +/* when vi->curr_queue_pairs > nr_cpu_ids, the txq/sq is only used for xdp tx on |
|---|
| 512 | + * the current cpu, so it does not need to be locked. |
|---|
| 513 | + * |
|---|
| 514 | + * Here we use marco instead of inline functions because we have to deal with |
|---|
| 515 | + * three issues at the same time: 1. the choice of sq. 2. judge and execute the |
|---|
| 516 | + * lock/unlock of txq 3. make sparse happy. It is difficult for two inline |
|---|
| 517 | + * functions to perfectly solve these three problems at the same time. |
|---|
| 518 | + */ |
|---|
| 519 | +#define virtnet_xdp_get_sq(vi) ({ \ |
|---|
| 520 | + struct netdev_queue *txq; \ |
|---|
| 521 | + typeof(vi) v = (vi); \ |
|---|
| 522 | + unsigned int qp; \ |
|---|
| 523 | + \ |
|---|
| 524 | + if (v->curr_queue_pairs > nr_cpu_ids) { \ |
|---|
| 525 | + qp = v->curr_queue_pairs - v->xdp_queue_pairs; \ |
|---|
| 526 | + qp += smp_processor_id(); \ |
|---|
| 527 | + txq = netdev_get_tx_queue(v->dev, qp); \ |
|---|
| 528 | + __netif_tx_acquire(txq); \ |
|---|
| 529 | + } else { \ |
|---|
| 530 | + qp = smp_processor_id() % v->curr_queue_pairs; \ |
|---|
| 531 | + txq = netdev_get_tx_queue(v->dev, qp); \ |
|---|
| 532 | + __netif_tx_lock(txq, raw_smp_processor_id()); \ |
|---|
| 533 | + } \ |
|---|
| 534 | + v->sq + qp; \ |
|---|
| 535 | +}) |
|---|
| 498 | 536 | |
|---|
| 499 | | - qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + smp_processor_id(); |
|---|
| 500 | | - return &vi->sq[qp]; |
|---|
| 537 | +#define virtnet_xdp_put_sq(vi, q) { \ |
|---|
| 538 | + struct netdev_queue *txq; \ |
|---|
| 539 | + typeof(vi) v = (vi); \ |
|---|
| 540 | + \ |
|---|
| 541 | + txq = netdev_get_tx_queue(v->dev, (q) - v->sq); \ |
|---|
| 542 | + if (v->curr_queue_pairs > nr_cpu_ids) \ |
|---|
| 543 | + __netif_tx_release(txq); \ |
|---|
| 544 | + else \ |
|---|
| 545 | + __netif_tx_unlock(txq); \ |
|---|
| 501 | 546 | } |
|---|
| 502 | 547 | |
|---|
| 503 | 548 | static int virtnet_xdp_xmit(struct net_device *dev, |
|---|
| .. | .. |
|---|
| 519 | 564 | /* Only allow ndo_xdp_xmit if XDP is loaded on dev, as this |
|---|
| 520 | 565 | * indicate XDP resources have been successfully allocated. |
|---|
| 521 | 566 | */ |
|---|
| 522 | | - xdp_prog = rcu_dereference(rq->xdp_prog); |
|---|
| 567 | + xdp_prog = rcu_access_pointer(rq->xdp_prog); |
|---|
| 523 | 568 | if (!xdp_prog) |
|---|
| 524 | 569 | return -ENXIO; |
|---|
| 525 | 570 | |
|---|
| 526 | | - sq = virtnet_xdp_sq(vi); |
|---|
| 571 | + sq = virtnet_xdp_get_sq(vi); |
|---|
| 527 | 572 | |
|---|
| 528 | 573 | if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) { |
|---|
| 529 | 574 | ret = -EINVAL; |
|---|
| .. | .. |
|---|
| 571 | 616 | sq->stats.kicks += kicks; |
|---|
| 572 | 617 | u64_stats_update_end(&sq->stats.syncp); |
|---|
| 573 | 618 | |
|---|
| 619 | + virtnet_xdp_put_sq(vi, sq); |
|---|
| 574 | 620 | return ret; |
|---|
| 575 | 621 | } |
|---|
| 576 | 622 | |
|---|
| 577 | 623 | static unsigned int virtnet_get_headroom(struct virtnet_info *vi) |
|---|
| 578 | 624 | { |
|---|
| 579 | | - return vi->xdp_queue_pairs ? VIRTIO_XDP_HEADROOM : 0; |
|---|
| 625 | + return vi->xdp_enabled ? VIRTIO_XDP_HEADROOM : 0; |
|---|
| 580 | 626 | } |
|---|
| 581 | 627 | |
|---|
| 582 | 628 | /* We copy the packet for XDP in the following cases: |
|---|
| .. | .. |
|---|
| 600 | 646 | int page_off, |
|---|
| 601 | 647 | unsigned int *len) |
|---|
| 602 | 648 | { |
|---|
| 603 | | - struct page *page = alloc_page(GFP_ATOMIC); |
|---|
| 649 | + int tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); |
|---|
| 650 | + struct page *page; |
|---|
| 604 | 651 | |
|---|
| 652 | + if (page_off + *len + tailroom > PAGE_SIZE) |
|---|
| 653 | + return NULL; |
|---|
| 654 | + |
|---|
| 655 | + page = alloc_page(GFP_ATOMIC); |
|---|
| 605 | 656 | if (!page) |
|---|
| 606 | 657 | return NULL; |
|---|
| 607 | 658 | |
|---|
| .. | .. |
|---|
| 609 | 660 | page_off += *len; |
|---|
| 610 | 661 | |
|---|
| 611 | 662 | while (--*num_buf) { |
|---|
| 612 | | - int tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); |
|---|
| 613 | 663 | unsigned int buflen; |
|---|
| 614 | 664 | void *buf; |
|---|
| 615 | 665 | int off; |
|---|
| .. | .. |
|---|
| 667 | 717 | len -= vi->hdr_len; |
|---|
| 668 | 718 | stats->bytes += len; |
|---|
| 669 | 719 | |
|---|
| 720 | + if (unlikely(len > GOOD_PACKET_LEN)) { |
|---|
| 721 | + pr_debug("%s: rx error: len %u exceeds max size %d\n", |
|---|
| 722 | + dev->name, len, GOOD_PACKET_LEN); |
|---|
| 723 | + dev->stats.rx_length_errors++; |
|---|
| 724 | + goto err_len; |
|---|
| 725 | + } |
|---|
| 670 | 726 | rcu_read_lock(); |
|---|
| 671 | 727 | xdp_prog = rcu_dereference(rq->xdp_prog); |
|---|
| 672 | 728 | if (xdp_prog) { |
|---|
| .. | .. |
|---|
| 705 | 761 | xdp.data_end = xdp.data + len; |
|---|
| 706 | 762 | xdp.data_meta = xdp.data; |
|---|
| 707 | 763 | xdp.rxq = &rq->xdp_rxq; |
|---|
| 764 | + xdp.frame_sz = buflen; |
|---|
| 708 | 765 | orig_data = xdp.data; |
|---|
| 709 | 766 | act = bpf_prog_run_xdp(xdp_prog, &xdp); |
|---|
| 710 | 767 | stats->xdp_packets++; |
|---|
| .. | .. |
|---|
| 718 | 775 | break; |
|---|
| 719 | 776 | case XDP_TX: |
|---|
| 720 | 777 | stats->xdp_tx++; |
|---|
| 721 | | - xdpf = convert_to_xdp_frame(&xdp); |
|---|
| 778 | + xdpf = xdp_convert_buff_to_frame(&xdp); |
|---|
| 722 | 779 | if (unlikely(!xdpf)) |
|---|
| 723 | 780 | goto err_xdp; |
|---|
| 724 | 781 | err = virtnet_xdp_xmit(dev, 1, &xdpf, 0); |
|---|
| .. | .. |
|---|
| 739 | 796 | goto xdp_xmit; |
|---|
| 740 | 797 | default: |
|---|
| 741 | 798 | bpf_warn_invalid_xdp_action(act); |
|---|
| 742 | | - /* fall through */ |
|---|
| 799 | + fallthrough; |
|---|
| 743 | 800 | case XDP_ABORTED: |
|---|
| 744 | 801 | trace_xdp_exception(vi->dev, xdp_prog, act); |
|---|
| 745 | 802 | case XDP_DROP: |
|---|
| .. | .. |
|---|
| 755 | 812 | } |
|---|
| 756 | 813 | skb_reserve(skb, headroom - delta); |
|---|
| 757 | 814 | skb_put(skb, len); |
|---|
| 758 | | - if (!delta) { |
|---|
| 815 | + if (!xdp_prog) { |
|---|
| 759 | 816 | buf += header_offset; |
|---|
| 760 | 817 | memcpy(skb_vnet_hdr(skb), buf, vi->hdr_len); |
|---|
| 761 | | - } /* keep zeroed vnet hdr since packet was changed by bpf */ |
|---|
| 818 | + } /* keep zeroed vnet hdr since XDP is loaded */ |
|---|
| 762 | 819 | |
|---|
| 763 | 820 | if (metasize) |
|---|
| 764 | 821 | skb_metadata_set(skb, metasize); |
|---|
| .. | .. |
|---|
| 769 | 826 | err_xdp: |
|---|
| 770 | 827 | rcu_read_unlock(); |
|---|
| 771 | 828 | stats->xdp_drops++; |
|---|
| 829 | +err_len: |
|---|
| 772 | 830 | stats->drops++; |
|---|
| 773 | 831 | put_page(page); |
|---|
| 774 | 832 | xdp_xmit: |
|---|
| .. | .. |
|---|
| 813 | 871 | int offset = buf - page_address(page); |
|---|
| 814 | 872 | struct sk_buff *head_skb, *curr_skb; |
|---|
| 815 | 873 | struct bpf_prog *xdp_prog; |
|---|
| 816 | | - unsigned int truesize; |
|---|
| 874 | + unsigned int truesize = mergeable_ctx_to_truesize(ctx); |
|---|
| 817 | 875 | unsigned int headroom = mergeable_ctx_to_headroom(ctx); |
|---|
| 818 | | - int err; |
|---|
| 819 | 876 | unsigned int metasize = 0; |
|---|
| 877 | + unsigned int frame_sz; |
|---|
| 878 | + int err; |
|---|
| 820 | 879 | |
|---|
| 821 | 880 | head_skb = NULL; |
|---|
| 822 | 881 | stats->bytes += len - vi->hdr_len; |
|---|
| 823 | 882 | |
|---|
| 883 | + if (unlikely(len > truesize)) { |
|---|
| 884 | + pr_debug("%s: rx error: len %u exceeds truesize %lu\n", |
|---|
| 885 | + dev->name, len, (unsigned long)ctx); |
|---|
| 886 | + dev->stats.rx_length_errors++; |
|---|
| 887 | + goto err_skb; |
|---|
| 888 | + } |
|---|
| 824 | 889 | rcu_read_lock(); |
|---|
| 825 | 890 | xdp_prog = rcu_dereference(rq->xdp_prog); |
|---|
| 826 | 891 | if (xdp_prog) { |
|---|
| .. | .. |
|---|
| 837 | 902 | if (unlikely(hdr->hdr.gso_type)) |
|---|
| 838 | 903 | goto err_xdp; |
|---|
| 839 | 904 | |
|---|
| 905 | + /* Buffers with headroom use PAGE_SIZE as alloc size, |
|---|
| 906 | + * see add_recvbuf_mergeable() + get_mergeable_buf_len() |
|---|
| 907 | + */ |
|---|
| 908 | + frame_sz = headroom ? PAGE_SIZE : truesize; |
|---|
| 909 | + |
|---|
| 840 | 910 | /* This happens when rx buffer size is underestimated |
|---|
| 841 | 911 | * or headroom is not enough because of the buffer |
|---|
| 842 | 912 | * was refilled before XDP is set. This should only |
|---|
| .. | .. |
|---|
| 850 | 920 | page, offset, |
|---|
| 851 | 921 | VIRTIO_XDP_HEADROOM, |
|---|
| 852 | 922 | &len); |
|---|
| 923 | + frame_sz = PAGE_SIZE; |
|---|
| 924 | + |
|---|
| 853 | 925 | if (!xdp_page) |
|---|
| 854 | 926 | goto err_xdp; |
|---|
| 855 | 927 | offset = VIRTIO_XDP_HEADROOM; |
|---|
| .. | .. |
|---|
| 866 | 938 | xdp.data_end = xdp.data + (len - vi->hdr_len); |
|---|
| 867 | 939 | xdp.data_meta = xdp.data; |
|---|
| 868 | 940 | xdp.rxq = &rq->xdp_rxq; |
|---|
| 941 | + xdp.frame_sz = frame_sz - vi->hdr_len; |
|---|
| 869 | 942 | |
|---|
| 870 | 943 | act = bpf_prog_run_xdp(xdp_prog, &xdp); |
|---|
| 871 | 944 | stats->xdp_packets++; |
|---|
| .. | .. |
|---|
| 898 | 971 | break; |
|---|
| 899 | 972 | case XDP_TX: |
|---|
| 900 | 973 | stats->xdp_tx++; |
|---|
| 901 | | - xdpf = convert_to_xdp_frame(&xdp); |
|---|
| 902 | | - if (unlikely(!xdpf)) |
|---|
| 974 | + xdpf = xdp_convert_buff_to_frame(&xdp); |
|---|
| 975 | + if (unlikely(!xdpf)) { |
|---|
| 976 | + if (unlikely(xdp_page != page)) |
|---|
| 977 | + put_page(xdp_page); |
|---|
| 903 | 978 | goto err_xdp; |
|---|
| 979 | + } |
|---|
| 904 | 980 | err = virtnet_xdp_xmit(dev, 1, &xdpf, 0); |
|---|
| 905 | 981 | if (unlikely(err < 0)) { |
|---|
| 906 | 982 | trace_xdp_exception(vi->dev, xdp_prog, act); |
|---|
| .. | .. |
|---|
| 928 | 1004 | goto xdp_xmit; |
|---|
| 929 | 1005 | default: |
|---|
| 930 | 1006 | bpf_warn_invalid_xdp_action(act); |
|---|
| 931 | | - /* fall through */ |
|---|
| 1007 | + fallthrough; |
|---|
| 932 | 1008 | case XDP_ABORTED: |
|---|
| 933 | 1009 | trace_xdp_exception(vi->dev, xdp_prog, act); |
|---|
| 934 | | - /* fall through */ |
|---|
| 1010 | + fallthrough; |
|---|
| 935 | 1011 | case XDP_DROP: |
|---|
| 936 | 1012 | if (unlikely(xdp_page != page)) |
|---|
| 937 | 1013 | __free_pages(xdp_page, 0); |
|---|
| .. | .. |
|---|
| 939 | 1015 | } |
|---|
| 940 | 1016 | } |
|---|
| 941 | 1017 | rcu_read_unlock(); |
|---|
| 942 | | - |
|---|
| 943 | | - truesize = mergeable_ctx_to_truesize(ctx); |
|---|
| 944 | | - if (unlikely(len > truesize)) { |
|---|
| 945 | | - pr_debug("%s: rx error: len %u exceeds truesize %lu\n", |
|---|
| 946 | | - dev->name, len, (unsigned long)ctx); |
|---|
| 947 | | - dev->stats.rx_length_errors++; |
|---|
| 948 | | - goto err_skb; |
|---|
| 949 | | - } |
|---|
| 950 | 1018 | |
|---|
| 951 | 1019 | head_skb = page_to_skb(vi, rq, page, offset, len, truesize, !xdp_prog, |
|---|
| 952 | 1020 | metasize); |
|---|
| .. | .. |
|---|
| 1081 | 1149 | goto frame_err; |
|---|
| 1082 | 1150 | } |
|---|
| 1083 | 1151 | |
|---|
| 1152 | + skb_record_rx_queue(skb, vq2rxq(rq->vq)); |
|---|
| 1084 | 1153 | skb->protocol = eth_type_trans(skb, dev); |
|---|
| 1085 | 1154 | pr_debug("Receiving skb proto 0x%04x len %i type %i\n", |
|---|
| 1086 | 1155 | ntohs(skb->protocol), skb->len, skb->pkt_type); |
|---|
| .. | .. |
|---|
| 1360 | 1429 | } |
|---|
| 1361 | 1430 | } |
|---|
| 1362 | 1431 | |
|---|
| 1363 | | - if (rq->vq->num_free > virtqueue_get_vring_size(rq->vq) / 2) { |
|---|
| 1364 | | - if (!try_fill_recv(vi, rq, GFP_ATOMIC)) |
|---|
| 1365 | | - schedule_delayed_work(&vi->refill, 0); |
|---|
| 1432 | + if (rq->vq->num_free > min((unsigned int)budget, virtqueue_get_vring_size(rq->vq)) / 2) { |
|---|
| 1433 | + if (!try_fill_recv(vi, rq, GFP_ATOMIC)) { |
|---|
| 1434 | + spin_lock(&vi->refill_lock); |
|---|
| 1435 | + if (vi->refill_enabled) |
|---|
| 1436 | + schedule_delayed_work(&vi->refill, 0); |
|---|
| 1437 | + spin_unlock(&vi->refill_lock); |
|---|
| 1438 | + } |
|---|
| 1366 | 1439 | } |
|---|
| 1367 | 1440 | |
|---|
| 1368 | 1441 | u64_stats_update_begin(&rq->stats.syncp); |
|---|
| .. | .. |
|---|
| 1456 | 1529 | |
|---|
| 1457 | 1530 | received = virtnet_receive(rq, budget, &xdp_xmit); |
|---|
| 1458 | 1531 | |
|---|
| 1532 | + if (xdp_xmit & VIRTIO_XDP_REDIR) |
|---|
| 1533 | + xdp_do_flush(); |
|---|
| 1534 | + |
|---|
| 1459 | 1535 | /* Out of packets? */ |
|---|
| 1460 | 1536 | if (received < budget) |
|---|
| 1461 | 1537 | virtqueue_napi_complete(napi, rq->vq, received); |
|---|
| 1462 | 1538 | |
|---|
| 1463 | | - if (xdp_xmit & VIRTIO_XDP_REDIR) |
|---|
| 1464 | | - xdp_do_flush_map(); |
|---|
| 1465 | | - |
|---|
| 1466 | 1539 | if (xdp_xmit & VIRTIO_XDP_TX) { |
|---|
| 1467 | | - sq = virtnet_xdp_sq(vi); |
|---|
| 1540 | + sq = virtnet_xdp_get_sq(vi); |
|---|
| 1468 | 1541 | if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) { |
|---|
| 1469 | 1542 | u64_stats_update_begin(&sq->stats.syncp); |
|---|
| 1470 | 1543 | sq->stats.kicks++; |
|---|
| 1471 | 1544 | u64_stats_update_end(&sq->stats.syncp); |
|---|
| 1472 | 1545 | } |
|---|
| 1546 | + virtnet_xdp_put_sq(vi, sq); |
|---|
| 1473 | 1547 | } |
|---|
| 1474 | 1548 | |
|---|
| 1475 | 1549 | return received; |
|---|
| .. | .. |
|---|
| 1479 | 1553 | { |
|---|
| 1480 | 1554 | struct virtnet_info *vi = netdev_priv(dev); |
|---|
| 1481 | 1555 | int i, err; |
|---|
| 1556 | + |
|---|
| 1557 | + enable_delayed_refill(vi); |
|---|
| 1482 | 1558 | |
|---|
| 1483 | 1559 | for (i = 0; i < vi->max_queue_pairs; i++) { |
|---|
| 1484 | 1560 | if (i < vi->curr_queue_pairs) |
|---|
| .. | .. |
|---|
| 1604 | 1680 | struct send_queue *sq = &vi->sq[qnum]; |
|---|
| 1605 | 1681 | int err; |
|---|
| 1606 | 1682 | struct netdev_queue *txq = netdev_get_tx_queue(dev, qnum); |
|---|
| 1607 | | - bool kick = !skb->xmit_more; |
|---|
| 1683 | + bool kick = !netdev_xmit_more(); |
|---|
| 1608 | 1684 | bool use_napi = sq->napi.weight; |
|---|
| 1609 | 1685 | |
|---|
| 1610 | 1686 | /* Free up any pending old buffers before queueing new ones. */ |
|---|
| .. | .. |
|---|
| 1624 | 1700 | dev->stats.tx_fifo_errors++; |
|---|
| 1625 | 1701 | if (net_ratelimit()) |
|---|
| 1626 | 1702 | dev_warn(&dev->dev, |
|---|
| 1627 | | - "Unexpected TXQ (%d) queue failure: %d\n", qnum, err); |
|---|
| 1703 | + "Unexpected TXQ (%d) queue failure: %d\n", |
|---|
| 1704 | + qnum, err); |
|---|
| 1628 | 1705 | dev->stats.tx_dropped++; |
|---|
| 1629 | 1706 | dev_kfree_skb_any(skb); |
|---|
| 1630 | 1707 | return NETDEV_TX_OK; |
|---|
| .. | .. |
|---|
| 1633 | 1710 | /* Don't wait up for transmitted skbs to be freed. */ |
|---|
| 1634 | 1711 | if (!use_napi) { |
|---|
| 1635 | 1712 | skb_orphan(skb); |
|---|
| 1636 | | - nf_reset(skb); |
|---|
| 1713 | + nf_reset_ct(skb); |
|---|
| 1637 | 1714 | } |
|---|
| 1638 | 1715 | |
|---|
| 1639 | 1716 | /* If running out of space, stop queue to avoid getting packets that we |
|---|
| .. | .. |
|---|
| 1849 | 1926 | struct virtnet_info *vi = netdev_priv(dev); |
|---|
| 1850 | 1927 | int i; |
|---|
| 1851 | 1928 | |
|---|
| 1929 | + /* Make sure NAPI doesn't schedule refill work */ |
|---|
| 1930 | + disable_delayed_refill(vi); |
|---|
| 1852 | 1931 | /* Make sure refill_work doesn't re-enable napi! */ |
|---|
| 1853 | 1932 | cancel_delayed_work_sync(&vi->refill); |
|---|
| 1854 | 1933 | |
|---|
| 1855 | 1934 | for (i = 0; i < vi->max_queue_pairs; i++) { |
|---|
| 1856 | | - xdp_rxq_info_unreg(&vi->rq[i].xdp_rxq); |
|---|
| 1857 | 1935 | napi_disable(&vi->rq[i].napi); |
|---|
| 1936 | + xdp_rxq_info_unreg(&vi->rq[i].xdp_rxq); |
|---|
| 1858 | 1937 | virtnet_napi_tx_disable(&vi->sq[i].napi); |
|---|
| 1859 | 1938 | } |
|---|
| 1860 | 1939 | |
|---|
| .. | .. |
|---|
| 1961 | 2040 | return 0; |
|---|
| 1962 | 2041 | } |
|---|
| 1963 | 2042 | |
|---|
| 1964 | | -static void virtnet_clean_affinity(struct virtnet_info *vi, long hcpu) |
|---|
| 2043 | +static void virtnet_clean_affinity(struct virtnet_info *vi) |
|---|
| 1965 | 2044 | { |
|---|
| 1966 | 2045 | int i; |
|---|
| 1967 | 2046 | |
|---|
| .. | .. |
|---|
| 1985 | 2064 | int stride; |
|---|
| 1986 | 2065 | |
|---|
| 1987 | 2066 | if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) { |
|---|
| 1988 | | - virtnet_clean_affinity(vi, -1); |
|---|
| 2067 | + virtnet_clean_affinity(vi); |
|---|
| 1989 | 2068 | return; |
|---|
| 1990 | 2069 | } |
|---|
| 1991 | 2070 | |
|---|
| .. | .. |
|---|
| 2035 | 2114 | struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info, |
|---|
| 2036 | 2115 | node); |
|---|
| 2037 | 2116 | |
|---|
| 2038 | | - virtnet_clean_affinity(vi, cpu); |
|---|
| 2117 | + virtnet_clean_affinity(vi); |
|---|
| 2039 | 2118 | return 0; |
|---|
| 2040 | 2119 | } |
|---|
| 2041 | 2120 | |
|---|
| .. | .. |
|---|
| 2216 | 2295 | channels->other_count = 0; |
|---|
| 2217 | 2296 | } |
|---|
| 2218 | 2297 | |
|---|
| 2219 | | -/* Check if the user is trying to change anything besides speed/duplex */ |
|---|
| 2220 | | -static bool |
|---|
| 2221 | | -virtnet_validate_ethtool_cmd(const struct ethtool_link_ksettings *cmd) |
|---|
| 2222 | | -{ |
|---|
| 2223 | | - struct ethtool_link_ksettings diff1 = *cmd; |
|---|
| 2224 | | - struct ethtool_link_ksettings diff2 = {}; |
|---|
| 2225 | | - |
|---|
| 2226 | | - /* cmd is always set so we need to clear it, validate the port type |
|---|
| 2227 | | - * and also without autonegotiation we can ignore advertising |
|---|
| 2228 | | - */ |
|---|
| 2229 | | - diff1.base.speed = 0; |
|---|
| 2230 | | - diff2.base.port = PORT_OTHER; |
|---|
| 2231 | | - ethtool_link_ksettings_zero_link_mode(&diff1, advertising); |
|---|
| 2232 | | - diff1.base.duplex = 0; |
|---|
| 2233 | | - diff1.base.cmd = 0; |
|---|
| 2234 | | - diff1.base.link_mode_masks_nwords = 0; |
|---|
| 2235 | | - |
|---|
| 2236 | | - return !memcmp(&diff1.base, &diff2.base, sizeof(diff1.base)) && |
|---|
| 2237 | | - bitmap_empty(diff1.link_modes.supported, |
|---|
| 2238 | | - __ETHTOOL_LINK_MODE_MASK_NBITS) && |
|---|
| 2239 | | - bitmap_empty(diff1.link_modes.advertising, |
|---|
| 2240 | | - __ETHTOOL_LINK_MODE_MASK_NBITS) && |
|---|
| 2241 | | - bitmap_empty(diff1.link_modes.lp_advertising, |
|---|
| 2242 | | - __ETHTOOL_LINK_MODE_MASK_NBITS); |
|---|
| 2243 | | -} |
|---|
| 2244 | | - |
|---|
| 2245 | 2298 | static int virtnet_set_link_ksettings(struct net_device *dev, |
|---|
| 2246 | 2299 | const struct ethtool_link_ksettings *cmd) |
|---|
| 2247 | 2300 | { |
|---|
| 2248 | 2301 | struct virtnet_info *vi = netdev_priv(dev); |
|---|
| 2249 | | - u32 speed; |
|---|
| 2250 | 2302 | |
|---|
| 2251 | | - speed = cmd->base.speed; |
|---|
| 2252 | | - /* don't allow custom speed and duplex */ |
|---|
| 2253 | | - if (!ethtool_validate_speed(speed) || |
|---|
| 2254 | | - !ethtool_validate_duplex(cmd->base.duplex) || |
|---|
| 2255 | | - !virtnet_validate_ethtool_cmd(cmd)) |
|---|
| 2256 | | - return -EINVAL; |
|---|
| 2257 | | - vi->speed = speed; |
|---|
| 2258 | | - vi->duplex = cmd->base.duplex; |
|---|
| 2259 | | - |
|---|
| 2260 | | - return 0; |
|---|
| 2303 | + return ethtool_virtdev_set_link_ksettings(dev, cmd, |
|---|
| 2304 | + &vi->speed, &vi->duplex); |
|---|
| 2261 | 2305 | } |
|---|
| 2262 | 2306 | |
|---|
| 2263 | 2307 | static int virtnet_get_link_ksettings(struct net_device *dev, |
|---|
| .. | .. |
|---|
| 2268 | 2312 | cmd->base.speed = vi->speed; |
|---|
| 2269 | 2313 | cmd->base.duplex = vi->duplex; |
|---|
| 2270 | 2314 | cmd->base.port = PORT_OTHER; |
|---|
| 2315 | + |
|---|
| 2316 | + return 0; |
|---|
| 2317 | +} |
|---|
| 2318 | + |
|---|
| 2319 | +static int virtnet_set_coalesce(struct net_device *dev, |
|---|
| 2320 | + struct ethtool_coalesce *ec) |
|---|
| 2321 | +{ |
|---|
| 2322 | + struct virtnet_info *vi = netdev_priv(dev); |
|---|
| 2323 | + int i, napi_weight; |
|---|
| 2324 | + |
|---|
| 2325 | + if (ec->tx_max_coalesced_frames > 1 || |
|---|
| 2326 | + ec->rx_max_coalesced_frames != 1) |
|---|
| 2327 | + return -EINVAL; |
|---|
| 2328 | + |
|---|
| 2329 | + napi_weight = ec->tx_max_coalesced_frames ? NAPI_POLL_WEIGHT : 0; |
|---|
| 2330 | + if (napi_weight ^ vi->sq[0].napi.weight) { |
|---|
| 2331 | + if (dev->flags & IFF_UP) |
|---|
| 2332 | + return -EBUSY; |
|---|
| 2333 | + for (i = 0; i < vi->max_queue_pairs; i++) |
|---|
| 2334 | + vi->sq[i].napi.weight = napi_weight; |
|---|
| 2335 | + } |
|---|
| 2336 | + |
|---|
| 2337 | + return 0; |
|---|
| 2338 | +} |
|---|
| 2339 | + |
|---|
| 2340 | +static int virtnet_get_coalesce(struct net_device *dev, |
|---|
| 2341 | + struct ethtool_coalesce *ec) |
|---|
| 2342 | +{ |
|---|
| 2343 | + struct ethtool_coalesce ec_default = { |
|---|
| 2344 | + .cmd = ETHTOOL_GCOALESCE, |
|---|
| 2345 | + .rx_max_coalesced_frames = 1, |
|---|
| 2346 | + }; |
|---|
| 2347 | + struct virtnet_info *vi = netdev_priv(dev); |
|---|
| 2348 | + |
|---|
| 2349 | + memcpy(ec, &ec_default, sizeof(ec_default)); |
|---|
| 2350 | + |
|---|
| 2351 | + if (vi->sq[0].napi.weight) |
|---|
| 2352 | + ec->tx_max_coalesced_frames = 1; |
|---|
| 2271 | 2353 | |
|---|
| 2272 | 2354 | return 0; |
|---|
| 2273 | 2355 | } |
|---|
| .. | .. |
|---|
| 2288 | 2370 | if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_SPEED_DUPLEX)) |
|---|
| 2289 | 2371 | return; |
|---|
| 2290 | 2372 | |
|---|
| 2291 | | - speed = virtio_cread32(vi->vdev, offsetof(struct virtio_net_config, |
|---|
| 2292 | | - speed)); |
|---|
| 2373 | + virtio_cread_le(vi->vdev, struct virtio_net_config, speed, &speed); |
|---|
| 2374 | + |
|---|
| 2293 | 2375 | if (ethtool_validate_speed(speed)) |
|---|
| 2294 | 2376 | vi->speed = speed; |
|---|
| 2295 | | - duplex = virtio_cread8(vi->vdev, offsetof(struct virtio_net_config, |
|---|
| 2296 | | - duplex)); |
|---|
| 2377 | + |
|---|
| 2378 | + virtio_cread_le(vi->vdev, struct virtio_net_config, duplex, &duplex); |
|---|
| 2379 | + |
|---|
| 2297 | 2380 | if (ethtool_validate_duplex(duplex)) |
|---|
| 2298 | 2381 | vi->duplex = duplex; |
|---|
| 2299 | 2382 | } |
|---|
| 2300 | 2383 | |
|---|
| 2301 | 2384 | static const struct ethtool_ops virtnet_ethtool_ops = { |
|---|
| 2385 | + .supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES, |
|---|
| 2302 | 2386 | .get_drvinfo = virtnet_get_drvinfo, |
|---|
| 2303 | 2387 | .get_link = ethtool_op_get_link, |
|---|
| 2304 | 2388 | .get_ringparam = virtnet_get_ringparam, |
|---|
| .. | .. |
|---|
| 2310 | 2394 | .get_ts_info = ethtool_op_get_ts_info, |
|---|
| 2311 | 2395 | .get_link_ksettings = virtnet_get_link_ksettings, |
|---|
| 2312 | 2396 | .set_link_ksettings = virtnet_set_link_ksettings, |
|---|
| 2397 | + .set_coalesce = virtnet_set_coalesce, |
|---|
| 2398 | + .get_coalesce = virtnet_get_coalesce, |
|---|
| 2313 | 2399 | }; |
|---|
| 2314 | 2400 | |
|---|
| 2315 | 2401 | static void virtnet_freeze_down(struct virtio_device *vdev) |
|---|
| 2316 | 2402 | { |
|---|
| 2317 | 2403 | struct virtnet_info *vi = vdev->priv; |
|---|
| 2318 | | - int i; |
|---|
| 2319 | 2404 | |
|---|
| 2320 | 2405 | /* Make sure no work handler is accessing the device */ |
|---|
| 2321 | 2406 | flush_work(&vi->config_work); |
|---|
| .. | .. |
|---|
| 2323 | 2408 | netif_tx_lock_bh(vi->dev); |
|---|
| 2324 | 2409 | netif_device_detach(vi->dev); |
|---|
| 2325 | 2410 | netif_tx_unlock_bh(vi->dev); |
|---|
| 2326 | | - cancel_delayed_work_sync(&vi->refill); |
|---|
| 2327 | | - |
|---|
| 2328 | | - if (netif_running(vi->dev)) { |
|---|
| 2329 | | - for (i = 0; i < vi->max_queue_pairs; i++) { |
|---|
| 2330 | | - napi_disable(&vi->rq[i].napi); |
|---|
| 2331 | | - virtnet_napi_tx_disable(&vi->sq[i].napi); |
|---|
| 2332 | | - } |
|---|
| 2333 | | - } |
|---|
| 2411 | + if (netif_running(vi->dev)) |
|---|
| 2412 | + virtnet_close(vi->dev); |
|---|
| 2334 | 2413 | } |
|---|
| 2335 | 2414 | |
|---|
| 2336 | 2415 | static int init_vqs(struct virtnet_info *vi); |
|---|
| .. | .. |
|---|
| 2338 | 2417 | static int virtnet_restore_up(struct virtio_device *vdev) |
|---|
| 2339 | 2418 | { |
|---|
| 2340 | 2419 | struct virtnet_info *vi = vdev->priv; |
|---|
| 2341 | | - int err, i; |
|---|
| 2420 | + int err; |
|---|
| 2342 | 2421 | |
|---|
| 2343 | 2422 | err = init_vqs(vi); |
|---|
| 2344 | 2423 | if (err) |
|---|
| .. | .. |
|---|
| 2346 | 2425 | |
|---|
| 2347 | 2426 | virtio_device_ready(vdev); |
|---|
| 2348 | 2427 | |
|---|
| 2349 | | - if (netif_running(vi->dev)) { |
|---|
| 2350 | | - for (i = 0; i < vi->curr_queue_pairs; i++) |
|---|
| 2351 | | - if (!try_fill_recv(vi, &vi->rq[i], GFP_KERNEL)) |
|---|
| 2352 | | - schedule_delayed_work(&vi->refill, 0); |
|---|
| 2428 | + enable_delayed_refill(vi); |
|---|
| 2353 | 2429 | |
|---|
| 2354 | | - for (i = 0; i < vi->max_queue_pairs; i++) { |
|---|
| 2355 | | - virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi); |
|---|
| 2356 | | - virtnet_napi_tx_enable(vi, vi->sq[i].vq, |
|---|
| 2357 | | - &vi->sq[i].napi); |
|---|
| 2358 | | - } |
|---|
| 2430 | + if (netif_running(vi->dev)) { |
|---|
| 2431 | + err = virtnet_open(vi->dev); |
|---|
| 2432 | + if (err) |
|---|
| 2433 | + return err; |
|---|
| 2359 | 2434 | } |
|---|
| 2360 | 2435 | |
|---|
| 2361 | 2436 | netif_tx_lock_bh(vi->dev); |
|---|
| .. | .. |
|---|
| 2373 | 2448 | |
|---|
| 2374 | 2449 | if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_GUEST_OFFLOADS, |
|---|
| 2375 | 2450 | VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET, &sg)) { |
|---|
| 2376 | | - dev_warn(&vi->dev->dev, "Fail to set guest offload. \n"); |
|---|
| 2451 | + dev_warn(&vi->dev->dev, "Fail to set guest offload.\n"); |
|---|
| 2377 | 2452 | return -EINVAL; |
|---|
| 2378 | 2453 | } |
|---|
| 2379 | 2454 | |
|---|
| .. | .. |
|---|
| 2415 | 2490 | virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) || |
|---|
| 2416 | 2491 | virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO) || |
|---|
| 2417 | 2492 | virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_CSUM))) { |
|---|
| 2418 | | - NL_SET_ERR_MSG_MOD(extack, "Can't set XDP while host is implementing LRO/CSUM, disable LRO/CSUM first"); |
|---|
| 2493 | + NL_SET_ERR_MSG_MOD(extack, "Can't set XDP while host is implementing GRO_HW/CSUM, disable GRO_HW/CSUM first"); |
|---|
| 2419 | 2494 | return -EOPNOTSUPP; |
|---|
| 2420 | 2495 | } |
|---|
| 2421 | 2496 | |
|---|
| .. | .. |
|---|
| 2436 | 2511 | |
|---|
| 2437 | 2512 | /* XDP requires extra queues for XDP_TX */ |
|---|
| 2438 | 2513 | if (curr_qp + xdp_qp > vi->max_queue_pairs) { |
|---|
| 2439 | | - NL_SET_ERR_MSG_MOD(extack, "Too few free TX rings available"); |
|---|
| 2440 | | - netdev_warn(dev, "request %i queues but max is %i\n", |
|---|
| 2514 | + netdev_warn(dev, "XDP request %i queues but max is %i. XDP_TX and XDP_REDIRECT will operate in a slower locked tx mode.\n", |
|---|
| 2441 | 2515 | curr_qp + xdp_qp, vi->max_queue_pairs); |
|---|
| 2442 | | - return -ENOMEM; |
|---|
| 2516 | + xdp_qp = 0; |
|---|
| 2443 | 2517 | } |
|---|
| 2444 | 2518 | |
|---|
| 2445 | 2519 | old_prog = rtnl_dereference(vi->rq[0].xdp_prog); |
|---|
| 2446 | 2520 | if (!prog && !old_prog) |
|---|
| 2447 | 2521 | return 0; |
|---|
| 2448 | 2522 | |
|---|
| 2449 | | - if (prog) { |
|---|
| 2450 | | - prog = bpf_prog_add(prog, vi->max_queue_pairs - 1); |
|---|
| 2451 | | - if (IS_ERR(prog)) |
|---|
| 2452 | | - return PTR_ERR(prog); |
|---|
| 2453 | | - } |
|---|
| 2523 | + if (prog) |
|---|
| 2524 | + bpf_prog_add(prog, vi->max_queue_pairs - 1); |
|---|
| 2454 | 2525 | |
|---|
| 2455 | 2526 | /* Make sure NAPI is not using any XDP TX queues for RX. */ |
|---|
| 2456 | 2527 | if (netif_running(dev)) { |
|---|
| .. | .. |
|---|
| 2476 | 2547 | vi->xdp_queue_pairs = xdp_qp; |
|---|
| 2477 | 2548 | |
|---|
| 2478 | 2549 | if (prog) { |
|---|
| 2550 | + vi->xdp_enabled = true; |
|---|
| 2479 | 2551 | for (i = 0; i < vi->max_queue_pairs; i++) { |
|---|
| 2480 | 2552 | rcu_assign_pointer(vi->rq[i].xdp_prog, prog); |
|---|
| 2481 | 2553 | if (i == 0 && !old_prog) |
|---|
| 2482 | 2554 | virtnet_clear_guest_offloads(vi); |
|---|
| 2483 | 2555 | } |
|---|
| 2556 | + } else { |
|---|
| 2557 | + vi->xdp_enabled = false; |
|---|
| 2484 | 2558 | } |
|---|
| 2485 | 2559 | |
|---|
| 2486 | 2560 | for (i = 0; i < vi->max_queue_pairs; i++) { |
|---|
| .. | .. |
|---|
| 2514 | 2588 | return err; |
|---|
| 2515 | 2589 | } |
|---|
| 2516 | 2590 | |
|---|
| 2517 | | -static u32 virtnet_xdp_query(struct net_device *dev) |
|---|
| 2518 | | -{ |
|---|
| 2519 | | - struct virtnet_info *vi = netdev_priv(dev); |
|---|
| 2520 | | - const struct bpf_prog *xdp_prog; |
|---|
| 2521 | | - int i; |
|---|
| 2522 | | - |
|---|
| 2523 | | - for (i = 0; i < vi->max_queue_pairs; i++) { |
|---|
| 2524 | | - xdp_prog = rtnl_dereference(vi->rq[i].xdp_prog); |
|---|
| 2525 | | - if (xdp_prog) |
|---|
| 2526 | | - return xdp_prog->aux->id; |
|---|
| 2527 | | - } |
|---|
| 2528 | | - return 0; |
|---|
| 2529 | | -} |
|---|
| 2530 | | - |
|---|
| 2531 | 2591 | static int virtnet_xdp(struct net_device *dev, struct netdev_bpf *xdp) |
|---|
| 2532 | 2592 | { |
|---|
| 2533 | 2593 | switch (xdp->command) { |
|---|
| 2534 | 2594 | case XDP_SETUP_PROG: |
|---|
| 2535 | 2595 | return virtnet_xdp_set(dev, xdp->prog, xdp->extack); |
|---|
| 2536 | | - case XDP_QUERY_PROG: |
|---|
| 2537 | | - xdp->prog_id = virtnet_xdp_query(dev); |
|---|
| 2538 | | - return 0; |
|---|
| 2539 | 2596 | default: |
|---|
| 2540 | 2597 | return -EINVAL; |
|---|
| 2541 | 2598 | } |
|---|
| .. | .. |
|---|
| 2557 | 2614 | return 0; |
|---|
| 2558 | 2615 | } |
|---|
| 2559 | 2616 | |
|---|
| 2617 | +static int virtnet_set_features(struct net_device *dev, |
|---|
| 2618 | + netdev_features_t features) |
|---|
| 2619 | +{ |
|---|
| 2620 | + struct virtnet_info *vi = netdev_priv(dev); |
|---|
| 2621 | + u64 offloads; |
|---|
| 2622 | + int err; |
|---|
| 2623 | + |
|---|
| 2624 | + if (!vi->has_cvq) |
|---|
| 2625 | + return 0; |
|---|
| 2626 | + |
|---|
| 2627 | + if ((dev->features ^ features) & NETIF_F_GRO_HW) { |
|---|
| 2628 | + if (vi->xdp_enabled) |
|---|
| 2629 | + return -EBUSY; |
|---|
| 2630 | + |
|---|
| 2631 | + if (features & NETIF_F_GRO_HW) |
|---|
| 2632 | + offloads = vi->guest_offloads_capable; |
|---|
| 2633 | + else |
|---|
| 2634 | + offloads = vi->guest_offloads_capable & |
|---|
| 2635 | + ~GUEST_OFFLOAD_GRO_HW_MASK; |
|---|
| 2636 | + |
|---|
| 2637 | + err = virtnet_set_guest_offloads(vi, offloads); |
|---|
| 2638 | + if (err) |
|---|
| 2639 | + return err; |
|---|
| 2640 | + vi->guest_offloads = offloads; |
|---|
| 2641 | + } |
|---|
| 2642 | + |
|---|
| 2643 | + return 0; |
|---|
| 2644 | +} |
|---|
| 2645 | + |
|---|
| 2560 | 2646 | static const struct net_device_ops virtnet_netdev = { |
|---|
| 2561 | 2647 | .ndo_open = virtnet_open, |
|---|
| 2562 | 2648 | .ndo_stop = virtnet_close, |
|---|
| .. | .. |
|---|
| 2571 | 2657 | .ndo_xdp_xmit = virtnet_xdp_xmit, |
|---|
| 2572 | 2658 | .ndo_features_check = passthru_features_check, |
|---|
| 2573 | 2659 | .ndo_get_phys_port_name = virtnet_get_phys_port_name, |
|---|
| 2660 | + .ndo_set_features = virtnet_set_features, |
|---|
| 2574 | 2661 | }; |
|---|
| 2575 | 2662 | |
|---|
| 2576 | 2663 | static void virtnet_config_changed_work(struct work_struct *work) |
|---|
| .. | .. |
|---|
| 2618 | 2705 | int i; |
|---|
| 2619 | 2706 | |
|---|
| 2620 | 2707 | for (i = 0; i < vi->max_queue_pairs; i++) { |
|---|
| 2621 | | - napi_hash_del(&vi->rq[i].napi); |
|---|
| 2622 | | - netif_napi_del(&vi->rq[i].napi); |
|---|
| 2623 | | - netif_napi_del(&vi->sq[i].napi); |
|---|
| 2708 | + __netif_napi_del(&vi->rq[i].napi); |
|---|
| 2709 | + __netif_napi_del(&vi->sq[i].napi); |
|---|
| 2624 | 2710 | } |
|---|
| 2625 | 2711 | |
|---|
| 2626 | | - /* We called napi_hash_del() before netif_napi_del(), |
|---|
| 2712 | + /* We called __netif_napi_del(), |
|---|
| 2627 | 2713 | * we need to respect an RCU grace period before freeing vi->rq |
|---|
| 2628 | 2714 | */ |
|---|
| 2629 | 2715 | synchronize_net(); |
|---|
| .. | .. |
|---|
| 2664 | 2750 | put_page(vi->rq[i].alloc_frag.page); |
|---|
| 2665 | 2751 | } |
|---|
| 2666 | 2752 | |
|---|
| 2753 | +static void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf) |
|---|
| 2754 | +{ |
|---|
| 2755 | + if (!is_xdp_frame(buf)) |
|---|
| 2756 | + dev_kfree_skb(buf); |
|---|
| 2757 | + else |
|---|
| 2758 | + xdp_return_frame(ptr_to_xdp(buf)); |
|---|
| 2759 | +} |
|---|
| 2760 | + |
|---|
| 2761 | +static void virtnet_rq_free_unused_buf(struct virtqueue *vq, void *buf) |
|---|
| 2762 | +{ |
|---|
| 2763 | + struct virtnet_info *vi = vq->vdev->priv; |
|---|
| 2764 | + int i = vq2rxq(vq); |
|---|
| 2765 | + |
|---|
| 2766 | + if (vi->mergeable_rx_bufs) |
|---|
| 2767 | + put_page(virt_to_head_page(buf)); |
|---|
| 2768 | + else if (vi->big_packets) |
|---|
| 2769 | + give_pages(&vi->rq[i], buf); |
|---|
| 2770 | + else |
|---|
| 2771 | + put_page(virt_to_head_page(buf)); |
|---|
| 2772 | +} |
|---|
| 2773 | + |
|---|
| 2667 | 2774 | static void free_unused_bufs(struct virtnet_info *vi) |
|---|
| 2668 | 2775 | { |
|---|
| 2669 | 2776 | void *buf; |
|---|
| .. | .. |
|---|
| 2671 | 2778 | |
|---|
| 2672 | 2779 | for (i = 0; i < vi->max_queue_pairs; i++) { |
|---|
| 2673 | 2780 | struct virtqueue *vq = vi->sq[i].vq; |
|---|
| 2674 | | - while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) { |
|---|
| 2675 | | - if (!is_xdp_frame(buf)) |
|---|
| 2676 | | - dev_kfree_skb(buf); |
|---|
| 2677 | | - else |
|---|
| 2678 | | - xdp_return_frame(ptr_to_xdp(buf)); |
|---|
| 2679 | | - } |
|---|
| 2781 | + while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) |
|---|
| 2782 | + virtnet_sq_free_unused_buf(vq, buf); |
|---|
| 2783 | + cond_resched(); |
|---|
| 2680 | 2784 | } |
|---|
| 2681 | 2785 | |
|---|
| 2682 | 2786 | for (i = 0; i < vi->max_queue_pairs; i++) { |
|---|
| 2683 | 2787 | struct virtqueue *vq = vi->rq[i].vq; |
|---|
| 2684 | | - |
|---|
| 2685 | | - while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) { |
|---|
| 2686 | | - if (vi->mergeable_rx_bufs) { |
|---|
| 2687 | | - put_page(virt_to_head_page(buf)); |
|---|
| 2688 | | - } else if (vi->big_packets) { |
|---|
| 2689 | | - give_pages(&vi->rq[i], buf); |
|---|
| 2690 | | - } else { |
|---|
| 2691 | | - put_page(virt_to_head_page(buf)); |
|---|
| 2692 | | - } |
|---|
| 2693 | | - } |
|---|
| 2788 | + while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) |
|---|
| 2789 | + virtnet_rq_free_unused_buf(vq, buf); |
|---|
| 2790 | + cond_resched(); |
|---|
| 2694 | 2791 | } |
|---|
| 2695 | 2792 | } |
|---|
| 2696 | 2793 | |
|---|
| .. | .. |
|---|
| 2698 | 2795 | { |
|---|
| 2699 | 2796 | struct virtio_device *vdev = vi->vdev; |
|---|
| 2700 | 2797 | |
|---|
| 2701 | | - virtnet_clean_affinity(vi, -1); |
|---|
| 2798 | + virtnet_clean_affinity(vi); |
|---|
| 2702 | 2799 | |
|---|
| 2703 | 2800 | vdev->config->del_vqs(vdev); |
|---|
| 2704 | 2801 | |
|---|
| .. | .. |
|---|
| 3019 | 3116 | } |
|---|
| 3020 | 3117 | if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_CSUM)) |
|---|
| 3021 | 3118 | dev->features |= NETIF_F_RXCSUM; |
|---|
| 3119 | + if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) || |
|---|
| 3120 | + virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6)) |
|---|
| 3121 | + dev->features |= NETIF_F_GRO_HW; |
|---|
| 3122 | + if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) |
|---|
| 3123 | + dev->hw_features |= NETIF_F_GRO_HW; |
|---|
| 3022 | 3124 | |
|---|
| 3023 | 3125 | dev->vlan_features = dev->features; |
|---|
| 3024 | 3126 | |
|---|
| .. | .. |
|---|
| 3041 | 3143 | vdev->priv = vi; |
|---|
| 3042 | 3144 | |
|---|
| 3043 | 3145 | INIT_WORK(&vi->config_work, virtnet_config_changed_work); |
|---|
| 3146 | + spin_lock_init(&vi->refill_lock); |
|---|
| 3044 | 3147 | |
|---|
| 3045 | 3148 | /* If we can receive ANY GSO packets, we must allocate large ones. */ |
|---|
| 3046 | 3149 | if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) || |
|---|
| .. | .. |
|---|
| 3073 | 3176 | /* Should never trigger: MTU was previously validated |
|---|
| 3074 | 3177 | * in virtnet_validate. |
|---|
| 3075 | 3178 | */ |
|---|
| 3076 | | - dev_err(&vdev->dev, "device MTU appears to have changed " |
|---|
| 3077 | | - "it is now %d < %d", mtu, dev->min_mtu); |
|---|
| 3179 | + dev_err(&vdev->dev, |
|---|
| 3180 | + "device MTU appears to have changed it is now %d < %d", |
|---|
| 3181 | + mtu, dev->min_mtu); |
|---|
| 3182 | + err = -EINVAL; |
|---|
| 3078 | 3183 | goto free; |
|---|
| 3079 | 3184 | } |
|---|
| 3080 | 3185 | |
|---|
| .. | .. |
|---|
| 3118 | 3223 | } |
|---|
| 3119 | 3224 | } |
|---|
| 3120 | 3225 | |
|---|
| 3121 | | - err = register_netdev(dev); |
|---|
| 3226 | + /* serialize netdev register + virtio_device_ready() with ndo_open() */ |
|---|
| 3227 | + rtnl_lock(); |
|---|
| 3228 | + |
|---|
| 3229 | + err = register_netdevice(dev); |
|---|
| 3122 | 3230 | if (err) { |
|---|
| 3123 | 3231 | pr_debug("virtio_net: registering device failed\n"); |
|---|
| 3232 | + rtnl_unlock(); |
|---|
| 3124 | 3233 | goto free_failover; |
|---|
| 3125 | 3234 | } |
|---|
| 3126 | 3235 | |
|---|
| 3127 | 3236 | virtio_device_ready(vdev); |
|---|
| 3237 | + |
|---|
| 3238 | + _virtnet_set_queues(vi, vi->curr_queue_pairs); |
|---|
| 3239 | + |
|---|
| 3240 | + rtnl_unlock(); |
|---|
| 3128 | 3241 | |
|---|
| 3129 | 3242 | err = virtnet_cpu_notif_add(vi); |
|---|
| 3130 | 3243 | if (err) { |
|---|
| 3131 | 3244 | pr_debug("virtio_net: registering cpu notifier failed\n"); |
|---|
| 3132 | 3245 | goto free_unregister_netdev; |
|---|
| 3133 | 3246 | } |
|---|
| 3134 | | - |
|---|
| 3135 | | - virtnet_set_queues(vi, vi->curr_queue_pairs); |
|---|
| 3136 | 3247 | |
|---|
| 3137 | 3248 | /* Assume link up if device can't report link status, |
|---|
| 3138 | 3249 | otherwise get link status from config. */ |
|---|
| .. | .. |
|---|
| 3148 | 3259 | for (i = 0; i < ARRAY_SIZE(guest_offloads); i++) |
|---|
| 3149 | 3260 | if (virtio_has_feature(vi->vdev, guest_offloads[i])) |
|---|
| 3150 | 3261 | set_bit(guest_offloads[i], &vi->guest_offloads); |
|---|
| 3262 | + vi->guest_offloads_capable = vi->guest_offloads; |
|---|
| 3151 | 3263 | |
|---|
| 3152 | 3264 | pr_debug("virtnet: registered device %s with %d RX and TX vq's\n", |
|---|
| 3153 | 3265 | dev->name, max_queue_pairs); |
|---|