.. | .. |
---|
44 | 44 | #include <linux/mm.h> |
---|
45 | 45 | #include <linux/slab.h> |
---|
46 | 46 | #include <net/ip.h> |
---|
| 47 | +#include <linux/bpf.h> |
---|
| 48 | +#include <net/page_pool.h> |
---|
| 49 | +#include <linux/bpf_trace.h> |
---|
47 | 50 | |
---|
48 | 51 | #include <xen/xen.h> |
---|
49 | 52 | #include <xen/xenbus.h> |
---|
.. | .. |
---|
62 | 65 | module_param_named(max_queues, xennet_max_queues, uint, 0644); |
---|
63 | 66 | MODULE_PARM_DESC(max_queues, |
---|
64 | 67 | "Maximum number of queues per virtual interface"); |
---|
| 68 | + |
---|
| 69 | +static bool __read_mostly xennet_trusted = true; |
---|
| 70 | +module_param_named(trusted, xennet_trusted, bool, 0644); |
---|
| 71 | +MODULE_PARM_DESC(trusted, "Is the backend trusted"); |
---|
65 | 72 | |
---|
66 | 73 | #define XENNET_TIMEOUT (5 * HZ) |
---|
67 | 74 | |
---|
.. | .. |
---|
104 | 111 | char name[QUEUE_NAME_SIZE]; /* DEVNAME-qN */ |
---|
105 | 112 | struct netfront_info *info; |
---|
106 | 113 | |
---|
| 114 | + struct bpf_prog __rcu *xdp_prog; |
---|
| 115 | + |
---|
107 | 116 | struct napi_struct napi; |
---|
108 | 117 | |
---|
109 | 118 | /* Split event channels support, tx_* == rx_* when using |
---|
.. | .. |
---|
145 | 154 | |
---|
146 | 155 | unsigned int rx_rsp_unconsumed; |
---|
147 | 156 | spinlock_t rx_cons_lock; |
---|
| 157 | + |
---|
| 158 | + struct page_pool *page_pool; |
---|
| 159 | + struct xdp_rxq_info xdp_rxq; |
---|
148 | 160 | }; |
---|
149 | 161 | |
---|
150 | 162 | struct netfront_info { |
---|
.. | .. |
---|
160 | 172 | struct netfront_stats __percpu *rx_stats; |
---|
161 | 173 | struct netfront_stats __percpu *tx_stats; |
---|
162 | 174 | |
---|
| 175 | + /* XDP state */ |
---|
| 176 | + bool netback_has_xdp_headroom; |
---|
| 177 | + bool netfront_xdp_enabled; |
---|
| 178 | + |
---|
163 | 179 | /* Is device behaving sane? */ |
---|
164 | 180 | bool broken; |
---|
| 181 | + |
---|
| 182 | + /* Should skbs be bounced into a zeroed buffer? */ |
---|
| 183 | + bool bounce; |
---|
165 | 184 | |
---|
166 | 185 | atomic_t rx_gso_checksum_fixup; |
---|
167 | 186 | }; |
---|
.. | .. |
---|
261 | 280 | if (unlikely(!skb)) |
---|
262 | 281 | return NULL; |
---|
263 | 282 | |
---|
264 | | - page = alloc_page(GFP_ATOMIC | __GFP_NOWARN); |
---|
265 | | - if (!page) { |
---|
| 283 | + page = page_pool_alloc_pages(queue->page_pool, |
---|
| 284 | + GFP_ATOMIC | __GFP_NOWARN | __GFP_ZERO); |
---|
| 285 | + if (unlikely(!page)) { |
---|
266 | 286 | kfree_skb(skb); |
---|
267 | 287 | return NULL; |
---|
268 | 288 | } |
---|
.. | .. |
---|
332 | 352 | mod_timer(&queue->rx_refill_timer, jiffies + (HZ/10)); |
---|
333 | 353 | return; |
---|
334 | 354 | } |
---|
335 | | - |
---|
336 | | - wmb(); /* barrier so backend seens requests */ |
---|
337 | 355 | |
---|
338 | 356 | RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&queue->rx, notify); |
---|
339 | 357 | if (notify) |
---|
.. | .. |
---|
414 | 432 | queue->tx_link[id] = TX_LINK_NONE; |
---|
415 | 433 | skb = queue->tx_skbs[id]; |
---|
416 | 434 | queue->tx_skbs[id] = NULL; |
---|
417 | | - if (unlikely(gnttab_query_foreign_access( |
---|
418 | | - queue->grant_tx_ref[id]) != 0)) { |
---|
| 435 | + if (unlikely(!gnttab_end_foreign_access_ref( |
---|
| 436 | + queue->grant_tx_ref[id], GNTMAP_readonly))) { |
---|
419 | 437 | dev_alert(dev, |
---|
420 | 438 | "Grant still in use by backend domain\n"); |
---|
421 | 439 | goto err; |
---|
422 | 440 | } |
---|
423 | | - gnttab_end_foreign_access_ref( |
---|
424 | | - queue->grant_tx_ref[id], GNTMAP_readonly); |
---|
425 | 441 | gnttab_release_grant_reference( |
---|
426 | 442 | &queue->gref_tx_head, queue->grant_tx_ref[id]); |
---|
427 | 443 | queue->grant_tx_ref[id] = GRANT_INVALID_REF; |
---|
.. | .. |
---|
556 | 572 | for (i = 0; i < frags; i++) { |
---|
557 | 573 | skb_frag_t *frag = skb_shinfo(skb)->frags + i; |
---|
558 | 574 | unsigned long size = skb_frag_size(frag); |
---|
559 | | - unsigned long offset = frag->page_offset; |
---|
| 575 | + unsigned long offset = skb_frag_off(frag); |
---|
560 | 576 | |
---|
561 | 577 | /* Skip unused frames from start of page */ |
---|
562 | 578 | offset &= ~PAGE_MASK; |
---|
.. | .. |
---|
568 | 584 | } |
---|
569 | 585 | |
---|
570 | 586 | static u16 xennet_select_queue(struct net_device *dev, struct sk_buff *skb, |
---|
571 | | - struct net_device *sb_dev, |
---|
572 | | - select_queue_fallback_t fallback) |
---|
| 587 | + struct net_device *sb_dev) |
---|
573 | 588 | { |
---|
574 | 589 | unsigned int num_queues = dev->real_num_tx_queues; |
---|
575 | 590 | u32 hash; |
---|
.. | .. |
---|
591 | 606 | unsigned int i; |
---|
592 | 607 | |
---|
593 | 608 | while ((i = get_id_from_list(&queue->tx_pend_queue, queue->tx_link)) != |
---|
594 | | - TX_LINK_NONE) |
---|
| 609 | + TX_LINK_NONE) |
---|
595 | 610 | queue->tx_link[i] = TX_PENDING; |
---|
| 611 | +} |
---|
| 612 | + |
---|
| 613 | +static int xennet_xdp_xmit_one(struct net_device *dev, |
---|
| 614 | + struct netfront_queue *queue, |
---|
| 615 | + struct xdp_frame *xdpf) |
---|
| 616 | +{ |
---|
| 617 | + struct netfront_info *np = netdev_priv(dev); |
---|
| 618 | + struct netfront_stats *tx_stats = this_cpu_ptr(np->tx_stats); |
---|
| 619 | + struct xennet_gnttab_make_txreq info = { |
---|
| 620 | + .queue = queue, |
---|
| 621 | + .skb = NULL, |
---|
| 622 | + .page = virt_to_page(xdpf->data), |
---|
| 623 | + }; |
---|
| 624 | + int notify; |
---|
| 625 | + |
---|
| 626 | + xennet_make_first_txreq(&info, |
---|
| 627 | + offset_in_page(xdpf->data), |
---|
| 628 | + xdpf->len); |
---|
| 629 | + |
---|
| 630 | + xennet_mark_tx_pending(queue); |
---|
| 631 | + |
---|
| 632 | + RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&queue->tx, notify); |
---|
| 633 | + if (notify) |
---|
| 634 | + notify_remote_via_irq(queue->tx_irq); |
---|
| 635 | + |
---|
| 636 | + u64_stats_update_begin(&tx_stats->syncp); |
---|
| 637 | + tx_stats->bytes += xdpf->len; |
---|
| 638 | + tx_stats->packets++; |
---|
| 639 | + u64_stats_update_end(&tx_stats->syncp); |
---|
| 640 | + |
---|
| 641 | + xennet_tx_buf_gc(queue); |
---|
| 642 | + |
---|
| 643 | + return 0; |
---|
| 644 | +} |
---|
| 645 | + |
---|
| 646 | +static int xennet_xdp_xmit(struct net_device *dev, int n, |
---|
| 647 | + struct xdp_frame **frames, u32 flags) |
---|
| 648 | +{ |
---|
| 649 | + unsigned int num_queues = dev->real_num_tx_queues; |
---|
| 650 | + struct netfront_info *np = netdev_priv(dev); |
---|
| 651 | + struct netfront_queue *queue = NULL; |
---|
| 652 | + unsigned long irq_flags; |
---|
| 653 | + int drops = 0; |
---|
| 654 | + int i, err; |
---|
| 655 | + |
---|
| 656 | + if (unlikely(np->broken)) |
---|
| 657 | + return -ENODEV; |
---|
| 658 | + if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) |
---|
| 659 | + return -EINVAL; |
---|
| 660 | + |
---|
| 661 | + queue = &np->queues[smp_processor_id() % num_queues]; |
---|
| 662 | + |
---|
| 663 | + spin_lock_irqsave(&queue->tx_lock, irq_flags); |
---|
| 664 | + for (i = 0; i < n; i++) { |
---|
| 665 | + struct xdp_frame *xdpf = frames[i]; |
---|
| 666 | + |
---|
| 667 | + if (!xdpf) |
---|
| 668 | + continue; |
---|
| 669 | + err = xennet_xdp_xmit_one(dev, queue, xdpf); |
---|
| 670 | + if (err) { |
---|
| 671 | + xdp_return_frame_rx_napi(xdpf); |
---|
| 672 | + drops++; |
---|
| 673 | + } |
---|
| 674 | + } |
---|
| 675 | + spin_unlock_irqrestore(&queue->tx_lock, irq_flags); |
---|
| 676 | + |
---|
| 677 | + return n - drops; |
---|
| 678 | +} |
---|
| 679 | + |
---|
| 680 | +struct sk_buff *bounce_skb(const struct sk_buff *skb) |
---|
| 681 | +{ |
---|
| 682 | + unsigned int headerlen = skb_headroom(skb); |
---|
| 683 | + /* Align size to allocate full pages and avoid contiguous data leaks */ |
---|
| 684 | + unsigned int size = ALIGN(skb_end_offset(skb) + skb->data_len, |
---|
| 685 | + XEN_PAGE_SIZE); |
---|
| 686 | + struct sk_buff *n = alloc_skb(size, GFP_ATOMIC | __GFP_ZERO); |
---|
| 687 | + |
---|
| 688 | + if (!n) |
---|
| 689 | + return NULL; |
---|
| 690 | + |
---|
| 691 | + if (!IS_ALIGNED((uintptr_t)n->head, XEN_PAGE_SIZE)) { |
---|
| 692 | + WARN_ONCE(1, "misaligned skb allocated\n"); |
---|
| 693 | + kfree_skb(n); |
---|
| 694 | + return NULL; |
---|
| 695 | + } |
---|
| 696 | + |
---|
| 697 | + /* Set the data pointer */ |
---|
| 698 | + skb_reserve(n, headerlen); |
---|
| 699 | + /* Set the tail pointer and length */ |
---|
| 700 | + skb_put(n, skb->len); |
---|
| 701 | + |
---|
| 702 | + BUG_ON(skb_copy_bits(skb, -headerlen, n->head, headerlen + skb->len)); |
---|
| 703 | + |
---|
| 704 | + skb_copy_header(n, skb); |
---|
| 705 | + return n; |
---|
596 | 706 | } |
---|
597 | 707 | |
---|
598 | 708 | #define MAX_XEN_SKB_FRAGS (65536 / XEN_PAGE_SIZE + 1) |
---|
.. | .. |
---|
647 | 757 | |
---|
648 | 758 | /* The first req should be at least ETH_HLEN size or the packet will be |
---|
649 | 759 | * dropped by netback. |
---|
| 760 | + * |
---|
| 761 | + * If the backend is not trusted bounce all data to zeroed pages to |
---|
| 762 | + * avoid exposing contiguous data on the granted page not belonging to |
---|
| 763 | + * the skb. |
---|
650 | 764 | */ |
---|
651 | | - if (unlikely(PAGE_SIZE - offset < ETH_HLEN)) { |
---|
652 | | - nskb = skb_copy(skb, GFP_ATOMIC); |
---|
| 765 | + if (np->bounce || unlikely(PAGE_SIZE - offset < ETH_HLEN)) { |
---|
| 766 | + nskb = bounce_skb(skb); |
---|
653 | 767 | if (!nskb) |
---|
654 | 768 | goto drop; |
---|
655 | 769 | dev_consume_skb_any(skb); |
---|
.. | .. |
---|
716 | 830 | for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { |
---|
717 | 831 | skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; |
---|
718 | 832 | xennet_make_txreqs(&info, skb_frag_page(frag), |
---|
719 | | - frag->page_offset, |
---|
| 833 | + skb_frag_off(frag), |
---|
720 | 834 | skb_frag_size(frag)); |
---|
721 | 835 | } |
---|
722 | 836 | |
---|
723 | 837 | /* First request has the packet length. */ |
---|
724 | 838 | first_tx->size = skb->len; |
---|
| 839 | + |
---|
| 840 | + /* timestamp packet in software */ |
---|
| 841 | + skb_tx_timestamp(skb); |
---|
725 | 842 | |
---|
726 | 843 | xennet_mark_tx_pending(queue); |
---|
727 | 844 | |
---|
.. | .. |
---|
762 | 879 | napi_disable(&queue->napi); |
---|
763 | 880 | } |
---|
764 | 881 | return 0; |
---|
| 882 | +} |
---|
| 883 | + |
---|
| 884 | +static void xennet_destroy_queues(struct netfront_info *info) |
---|
| 885 | +{ |
---|
| 886 | + unsigned int i; |
---|
| 887 | + |
---|
| 888 | + for (i = 0; i < info->netdev->real_num_tx_queues; i++) { |
---|
| 889 | + struct netfront_queue *queue = &info->queues[i]; |
---|
| 890 | + |
---|
| 891 | + if (netif_running(info->netdev)) |
---|
| 892 | + napi_disable(&queue->napi); |
---|
| 893 | + netif_napi_del(&queue->napi); |
---|
| 894 | + } |
---|
| 895 | + |
---|
| 896 | + kfree(info->queues); |
---|
| 897 | + info->queues = NULL; |
---|
| 898 | +} |
---|
| 899 | + |
---|
| 900 | +static void xennet_uninit(struct net_device *dev) |
---|
| 901 | +{ |
---|
| 902 | + struct netfront_info *np = netdev_priv(dev); |
---|
| 903 | + xennet_destroy_queues(np); |
---|
765 | 904 | } |
---|
766 | 905 | |
---|
767 | 906 | static void xennet_set_rx_rsp_cons(struct netfront_queue *queue, RING_IDX val) |
---|
.. | .. |
---|
829 | 968 | return err; |
---|
830 | 969 | } |
---|
831 | 970 | |
---|
| 971 | +static u32 xennet_run_xdp(struct netfront_queue *queue, struct page *pdata, |
---|
| 972 | + struct xen_netif_rx_response *rx, struct bpf_prog *prog, |
---|
| 973 | + struct xdp_buff *xdp, bool *need_xdp_flush) |
---|
| 974 | +{ |
---|
| 975 | + struct xdp_frame *xdpf; |
---|
| 976 | + u32 len = rx->status; |
---|
| 977 | + u32 act; |
---|
| 978 | + int err; |
---|
| 979 | + |
---|
| 980 | + xdp->data_hard_start = page_address(pdata); |
---|
| 981 | + xdp->data = xdp->data_hard_start + XDP_PACKET_HEADROOM; |
---|
| 982 | + xdp_set_data_meta_invalid(xdp); |
---|
| 983 | + xdp->data_end = xdp->data + len; |
---|
| 984 | + xdp->rxq = &queue->xdp_rxq; |
---|
| 985 | + xdp->frame_sz = XEN_PAGE_SIZE - XDP_PACKET_HEADROOM; |
---|
| 986 | + |
---|
| 987 | + act = bpf_prog_run_xdp(prog, xdp); |
---|
| 988 | + switch (act) { |
---|
| 989 | + case XDP_TX: |
---|
| 990 | + get_page(pdata); |
---|
| 991 | + xdpf = xdp_convert_buff_to_frame(xdp); |
---|
| 992 | + err = xennet_xdp_xmit(queue->info->netdev, 1, &xdpf, 0); |
---|
| 993 | + if (unlikely(err < 0)) |
---|
| 994 | + trace_xdp_exception(queue->info->netdev, prog, act); |
---|
| 995 | + break; |
---|
| 996 | + case XDP_REDIRECT: |
---|
| 997 | + get_page(pdata); |
---|
| 998 | + err = xdp_do_redirect(queue->info->netdev, xdp, prog); |
---|
| 999 | + *need_xdp_flush = true; |
---|
| 1000 | + if (unlikely(err)) |
---|
| 1001 | + trace_xdp_exception(queue->info->netdev, prog, act); |
---|
| 1002 | + break; |
---|
| 1003 | + case XDP_PASS: |
---|
| 1004 | + case XDP_DROP: |
---|
| 1005 | + break; |
---|
| 1006 | + |
---|
| 1007 | + case XDP_ABORTED: |
---|
| 1008 | + trace_xdp_exception(queue->info->netdev, prog, act); |
---|
| 1009 | + break; |
---|
| 1010 | + |
---|
| 1011 | + default: |
---|
| 1012 | + bpf_warn_invalid_xdp_action(act); |
---|
| 1013 | + } |
---|
| 1014 | + |
---|
| 1015 | + return act; |
---|
| 1016 | +} |
---|
| 1017 | + |
---|
832 | 1018 | static int xennet_get_responses(struct netfront_queue *queue, |
---|
833 | 1019 | struct netfront_rx_info *rinfo, RING_IDX rp, |
---|
834 | | - struct sk_buff_head *list) |
---|
| 1020 | + struct sk_buff_head *list, |
---|
| 1021 | + bool *need_xdp_flush) |
---|
835 | 1022 | { |
---|
836 | 1023 | struct xen_netif_rx_response *rx = &rinfo->rx, rx_local; |
---|
837 | | - struct xen_netif_extra_info *extras = rinfo->extras; |
---|
838 | | - struct device *dev = &queue->info->netdev->dev; |
---|
| 1024 | + int max = XEN_NETIF_NR_SLOTS_MIN + (rx->status <= RX_COPY_THRESHOLD); |
---|
839 | 1025 | RING_IDX cons = queue->rx.rsp_cons; |
---|
840 | 1026 | struct sk_buff *skb = xennet_get_rx_skb(queue, cons); |
---|
| 1027 | + struct xen_netif_extra_info *extras = rinfo->extras; |
---|
841 | 1028 | grant_ref_t ref = xennet_get_rx_ref(queue, cons); |
---|
842 | | - int max = XEN_NETIF_NR_SLOTS_MIN + (rx->status <= RX_COPY_THRESHOLD); |
---|
| 1029 | + struct device *dev = &queue->info->netdev->dev; |
---|
| 1030 | + struct bpf_prog *xdp_prog; |
---|
| 1031 | + struct xdp_buff xdp; |
---|
843 | 1032 | int slots = 1; |
---|
844 | 1033 | int err = 0; |
---|
845 | | - unsigned long ret; |
---|
| 1034 | + u32 verdict; |
---|
846 | 1035 | |
---|
847 | 1036 | if (rx->flags & XEN_NETRXF_extra_info) { |
---|
848 | 1037 | err = xennet_get_extras(queue, extras, rp); |
---|
| 1038 | + if (!err) { |
---|
| 1039 | + if (extras[XEN_NETIF_EXTRA_TYPE_XDP - 1].type) { |
---|
| 1040 | + struct xen_netif_extra_info *xdp; |
---|
| 1041 | + |
---|
| 1042 | + xdp = &extras[XEN_NETIF_EXTRA_TYPE_XDP - 1]; |
---|
| 1043 | + rx->offset = xdp->u.xdp.headroom; |
---|
| 1044 | + } |
---|
| 1045 | + } |
---|
849 | 1046 | cons = queue->rx.rsp_cons; |
---|
850 | 1047 | } |
---|
851 | 1048 | |
---|
.. | .. |
---|
873 | 1070 | goto next; |
---|
874 | 1071 | } |
---|
875 | 1072 | |
---|
876 | | - ret = gnttab_end_foreign_access_ref(ref, 0); |
---|
877 | | - BUG_ON(!ret); |
---|
| 1073 | + if (!gnttab_end_foreign_access_ref(ref, 0)) { |
---|
| 1074 | + dev_alert(dev, |
---|
| 1075 | + "Grant still in use by backend domain\n"); |
---|
| 1076 | + queue->info->broken = true; |
---|
| 1077 | + dev_alert(dev, "Disabled for further use\n"); |
---|
| 1078 | + return -EINVAL; |
---|
| 1079 | + } |
---|
878 | 1080 | |
---|
879 | 1081 | gnttab_release_grant_reference(&queue->gref_rx_head, ref); |
---|
| 1082 | + |
---|
| 1083 | + rcu_read_lock(); |
---|
| 1084 | + xdp_prog = rcu_dereference(queue->xdp_prog); |
---|
| 1085 | + if (xdp_prog) { |
---|
| 1086 | + if (!(rx->flags & XEN_NETRXF_more_data)) { |
---|
| 1087 | + /* currently only a single page contains data */ |
---|
| 1088 | + verdict = xennet_run_xdp(queue, |
---|
| 1089 | + skb_frag_page(&skb_shinfo(skb)->frags[0]), |
---|
| 1090 | + rx, xdp_prog, &xdp, need_xdp_flush); |
---|
| 1091 | + if (verdict != XDP_PASS) |
---|
| 1092 | + err = -EINVAL; |
---|
| 1093 | + } else { |
---|
| 1094 | + /* drop the frame */ |
---|
| 1095 | + err = -EINVAL; |
---|
| 1096 | + } |
---|
| 1097 | + } |
---|
| 1098 | + rcu_read_unlock(); |
---|
880 | 1099 | |
---|
881 | 1100 | __skb_queue_tail(list, skb); |
---|
882 | 1101 | |
---|
.. | .. |
---|
1052 | 1271 | struct sk_buff_head errq; |
---|
1053 | 1272 | struct sk_buff_head tmpq; |
---|
1054 | 1273 | int err; |
---|
| 1274 | + bool need_xdp_flush = false; |
---|
1055 | 1275 | |
---|
1056 | 1276 | spin_lock(&queue->rx_lock); |
---|
1057 | 1277 | |
---|
.. | .. |
---|
1075 | 1295 | RING_COPY_RESPONSE(&queue->rx, i, rx); |
---|
1076 | 1296 | memset(extras, 0, sizeof(rinfo.extras)); |
---|
1077 | 1297 | |
---|
1078 | | - err = xennet_get_responses(queue, &rinfo, rp, &tmpq); |
---|
| 1298 | + err = xennet_get_responses(queue, &rinfo, rp, &tmpq, |
---|
| 1299 | + &need_xdp_flush); |
---|
1079 | 1300 | |
---|
1080 | 1301 | if (unlikely(err)) { |
---|
| 1302 | + if (queue->info->broken) { |
---|
| 1303 | + spin_unlock(&queue->rx_lock); |
---|
| 1304 | + return 0; |
---|
| 1305 | + } |
---|
1081 | 1306 | err: |
---|
1082 | 1307 | while ((skb = __skb_dequeue(&tmpq))) |
---|
1083 | 1308 | __skb_queue_tail(&errq, skb); |
---|
.. | .. |
---|
1105 | 1330 | if (NETFRONT_SKB_CB(skb)->pull_to > RX_COPY_THRESHOLD) |
---|
1106 | 1331 | NETFRONT_SKB_CB(skb)->pull_to = RX_COPY_THRESHOLD; |
---|
1107 | 1332 | |
---|
1108 | | - skb_shinfo(skb)->frags[0].page_offset = rx->offset; |
---|
| 1333 | + skb_frag_off_set(&skb_shinfo(skb)->frags[0], rx->offset); |
---|
1109 | 1334 | skb_frag_size_set(&skb_shinfo(skb)->frags[0], rx->status); |
---|
1110 | 1335 | skb->data_len = rx->status; |
---|
1111 | 1336 | skb->len += rx->status; |
---|
.. | .. |
---|
1124 | 1349 | xennet_set_rx_rsp_cons(queue, i); |
---|
1125 | 1350 | work_done++; |
---|
1126 | 1351 | } |
---|
| 1352 | + if (need_xdp_flush) |
---|
| 1353 | + xdp_do_flush(); |
---|
1127 | 1354 | |
---|
1128 | 1355 | __skb_queue_purge(&errq); |
---|
1129 | 1356 | |
---|
.. | .. |
---|
1375 | 1602 | } |
---|
1376 | 1603 | #endif |
---|
1377 | 1604 | |
---|
| 1605 | +#define NETBACK_XDP_HEADROOM_DISABLE 0 |
---|
| 1606 | +#define NETBACK_XDP_HEADROOM_ENABLE 1 |
---|
| 1607 | + |
---|
| 1608 | +static int talk_to_netback_xdp(struct netfront_info *np, int xdp) |
---|
| 1609 | +{ |
---|
| 1610 | + int err; |
---|
| 1611 | + unsigned short headroom; |
---|
| 1612 | + |
---|
| 1613 | + headroom = xdp ? XDP_PACKET_HEADROOM : 0; |
---|
| 1614 | + err = xenbus_printf(XBT_NIL, np->xbdev->nodename, |
---|
| 1615 | + "xdp-headroom", "%hu", |
---|
| 1616 | + headroom); |
---|
| 1617 | + if (err) |
---|
| 1618 | + pr_warn("Error writing xdp-headroom\n"); |
---|
| 1619 | + |
---|
| 1620 | + return err; |
---|
| 1621 | +} |
---|
| 1622 | + |
---|
| 1623 | +static int xennet_xdp_set(struct net_device *dev, struct bpf_prog *prog, |
---|
| 1624 | + struct netlink_ext_ack *extack) |
---|
| 1625 | +{ |
---|
| 1626 | + unsigned long max_mtu = XEN_PAGE_SIZE - XDP_PACKET_HEADROOM; |
---|
| 1627 | + struct netfront_info *np = netdev_priv(dev); |
---|
| 1628 | + struct bpf_prog *old_prog; |
---|
| 1629 | + unsigned int i, err; |
---|
| 1630 | + |
---|
| 1631 | + if (dev->mtu > max_mtu) { |
---|
| 1632 | + netdev_warn(dev, "XDP requires MTU less than %lu\n", max_mtu); |
---|
| 1633 | + return -EINVAL; |
---|
| 1634 | + } |
---|
| 1635 | + |
---|
| 1636 | + if (!np->netback_has_xdp_headroom) |
---|
| 1637 | + return 0; |
---|
| 1638 | + |
---|
| 1639 | + xenbus_switch_state(np->xbdev, XenbusStateReconfiguring); |
---|
| 1640 | + |
---|
| 1641 | + err = talk_to_netback_xdp(np, prog ? NETBACK_XDP_HEADROOM_ENABLE : |
---|
| 1642 | + NETBACK_XDP_HEADROOM_DISABLE); |
---|
| 1643 | + if (err) |
---|
| 1644 | + return err; |
---|
| 1645 | + |
---|
| 1646 | + /* avoid the race with XDP headroom adjustment */ |
---|
| 1647 | + wait_event(module_wq, |
---|
| 1648 | + xenbus_read_driver_state(np->xbdev->otherend) == |
---|
| 1649 | + XenbusStateReconfigured); |
---|
| 1650 | + np->netfront_xdp_enabled = true; |
---|
| 1651 | + |
---|
| 1652 | + old_prog = rtnl_dereference(np->queues[0].xdp_prog); |
---|
| 1653 | + |
---|
| 1654 | + if (prog) |
---|
| 1655 | + bpf_prog_add(prog, dev->real_num_tx_queues); |
---|
| 1656 | + |
---|
| 1657 | + for (i = 0; i < dev->real_num_tx_queues; ++i) |
---|
| 1658 | + rcu_assign_pointer(np->queues[i].xdp_prog, prog); |
---|
| 1659 | + |
---|
| 1660 | + if (old_prog) |
---|
| 1661 | + for (i = 0; i < dev->real_num_tx_queues; ++i) |
---|
| 1662 | + bpf_prog_put(old_prog); |
---|
| 1663 | + |
---|
| 1664 | + xenbus_switch_state(np->xbdev, XenbusStateConnected); |
---|
| 1665 | + |
---|
| 1666 | + return 0; |
---|
| 1667 | +} |
---|
| 1668 | + |
---|
| 1669 | +static int xennet_xdp(struct net_device *dev, struct netdev_bpf *xdp) |
---|
| 1670 | +{ |
---|
| 1671 | + struct netfront_info *np = netdev_priv(dev); |
---|
| 1672 | + |
---|
| 1673 | + if (np->broken) |
---|
| 1674 | + return -ENODEV; |
---|
| 1675 | + |
---|
| 1676 | + switch (xdp->command) { |
---|
| 1677 | + case XDP_SETUP_PROG: |
---|
| 1678 | + return xennet_xdp_set(dev, xdp->prog, xdp->extack); |
---|
| 1679 | + default: |
---|
| 1680 | + return -EINVAL; |
---|
| 1681 | + } |
---|
| 1682 | +} |
---|
| 1683 | + |
---|
1378 | 1684 | static const struct net_device_ops xennet_netdev_ops = { |
---|
| 1685 | + .ndo_uninit = xennet_uninit, |
---|
1379 | 1686 | .ndo_open = xennet_open, |
---|
1380 | 1687 | .ndo_stop = xennet_close, |
---|
1381 | 1688 | .ndo_start_xmit = xennet_start_xmit, |
---|
.. | .. |
---|
1386 | 1693 | .ndo_fix_features = xennet_fix_features, |
---|
1387 | 1694 | .ndo_set_features = xennet_set_features, |
---|
1388 | 1695 | .ndo_select_queue = xennet_select_queue, |
---|
| 1696 | + .ndo_bpf = xennet_xdp, |
---|
| 1697 | + .ndo_xdp_xmit = xennet_xdp_xmit, |
---|
1389 | 1698 | #ifdef CONFIG_NET_POLL_CONTROLLER |
---|
1390 | 1699 | .ndo_poll_controller = xennet_poll_controller, |
---|
1391 | 1700 | #endif |
---|
.. | .. |
---|
1445 | 1754 | SET_NETDEV_DEV(netdev, &dev->dev); |
---|
1446 | 1755 | |
---|
1447 | 1756 | np->netdev = netdev; |
---|
| 1757 | + np->netfront_xdp_enabled = false; |
---|
1448 | 1758 | |
---|
1449 | 1759 | netif_carrier_off(netdev); |
---|
1450 | 1760 | |
---|
.. | .. |
---|
1536 | 1846 | queue->rx_ring_ref = GRANT_INVALID_REF; |
---|
1537 | 1847 | queue->tx.sring = NULL; |
---|
1538 | 1848 | queue->rx.sring = NULL; |
---|
| 1849 | + |
---|
| 1850 | + page_pool_destroy(queue->page_pool); |
---|
1539 | 1851 | } |
---|
1540 | 1852 | } |
---|
1541 | 1853 | |
---|
.. | .. |
---|
1556 | 1868 | netif_tx_unlock_bh(info->netdev); |
---|
1557 | 1869 | |
---|
1558 | 1870 | xennet_disconnect_backend(info); |
---|
| 1871 | + |
---|
| 1872 | + rtnl_lock(); |
---|
| 1873 | + if (info->queues) |
---|
| 1874 | + xennet_destroy_queues(info); |
---|
| 1875 | + rtnl_unlock(); |
---|
| 1876 | + |
---|
1559 | 1877 | return 0; |
---|
1560 | 1878 | } |
---|
1561 | 1879 | |
---|
.. | .. |
---|
1655 | 1973 | struct netfront_queue *queue, unsigned int feature_split_evtchn) |
---|
1656 | 1974 | { |
---|
1657 | 1975 | struct xen_netif_tx_sring *txs; |
---|
1658 | | - struct xen_netif_rx_sring *rxs; |
---|
| 1976 | + struct xen_netif_rx_sring *rxs = NULL; |
---|
1659 | 1977 | grant_ref_t gref; |
---|
1660 | 1978 | int err; |
---|
1661 | 1979 | |
---|
.. | .. |
---|
1675 | 1993 | |
---|
1676 | 1994 | err = xenbus_grant_ring(dev, txs, 1, &gref); |
---|
1677 | 1995 | if (err < 0) |
---|
1678 | | - goto grant_tx_ring_fail; |
---|
| 1996 | + goto fail; |
---|
1679 | 1997 | queue->tx_ring_ref = gref; |
---|
1680 | 1998 | |
---|
1681 | 1999 | rxs = (struct xen_netif_rx_sring *)get_zeroed_page(GFP_NOIO | __GFP_HIGH); |
---|
1682 | 2000 | if (!rxs) { |
---|
1683 | 2001 | err = -ENOMEM; |
---|
1684 | 2002 | xenbus_dev_fatal(dev, err, "allocating rx ring page"); |
---|
1685 | | - goto alloc_rx_ring_fail; |
---|
| 2003 | + goto fail; |
---|
1686 | 2004 | } |
---|
1687 | 2005 | SHARED_RING_INIT(rxs); |
---|
1688 | 2006 | FRONT_RING_INIT(&queue->rx, rxs, XEN_PAGE_SIZE); |
---|
1689 | 2007 | |
---|
1690 | 2008 | err = xenbus_grant_ring(dev, rxs, 1, &gref); |
---|
1691 | 2009 | if (err < 0) |
---|
1692 | | - goto grant_rx_ring_fail; |
---|
| 2010 | + goto fail; |
---|
1693 | 2011 | queue->rx_ring_ref = gref; |
---|
1694 | 2012 | |
---|
1695 | 2013 | if (feature_split_evtchn) |
---|
.. | .. |
---|
1702 | 2020 | err = setup_netfront_single(queue); |
---|
1703 | 2021 | |
---|
1704 | 2022 | if (err) |
---|
1705 | | - goto alloc_evtchn_fail; |
---|
| 2023 | + goto fail; |
---|
1706 | 2024 | |
---|
1707 | 2025 | return 0; |
---|
1708 | 2026 | |
---|
1709 | 2027 | /* If we fail to setup netfront, it is safe to just revoke access to |
---|
1710 | 2028 | * granted pages because backend is not accessing it at this point. |
---|
1711 | 2029 | */ |
---|
1712 | | -alloc_evtchn_fail: |
---|
1713 | | - gnttab_end_foreign_access_ref(queue->rx_ring_ref, 0); |
---|
1714 | | -grant_rx_ring_fail: |
---|
1715 | | - free_page((unsigned long)rxs); |
---|
1716 | | -alloc_rx_ring_fail: |
---|
1717 | | - gnttab_end_foreign_access_ref(queue->tx_ring_ref, 0); |
---|
1718 | | -grant_tx_ring_fail: |
---|
1719 | | - free_page((unsigned long)txs); |
---|
1720 | | -fail: |
---|
| 2030 | + fail: |
---|
| 2031 | + if (queue->rx_ring_ref != GRANT_INVALID_REF) { |
---|
| 2032 | + gnttab_end_foreign_access(queue->rx_ring_ref, 0, |
---|
| 2033 | + (unsigned long)rxs); |
---|
| 2034 | + queue->rx_ring_ref = GRANT_INVALID_REF; |
---|
| 2035 | + } else { |
---|
| 2036 | + free_page((unsigned long)rxs); |
---|
| 2037 | + } |
---|
| 2038 | + if (queue->tx_ring_ref != GRANT_INVALID_REF) { |
---|
| 2039 | + gnttab_end_foreign_access(queue->tx_ring_ref, 0, |
---|
| 2040 | + (unsigned long)txs); |
---|
| 2041 | + queue->tx_ring_ref = GRANT_INVALID_REF; |
---|
| 2042 | + } else { |
---|
| 2043 | + free_page((unsigned long)txs); |
---|
| 2044 | + } |
---|
1721 | 2045 | return err; |
---|
1722 | 2046 | } |
---|
1723 | 2047 | |
---|
.. | .. |
---|
1863 | 2187 | return err; |
---|
1864 | 2188 | } |
---|
1865 | 2189 | |
---|
1866 | | -static void xennet_destroy_queues(struct netfront_info *info) |
---|
| 2190 | + |
---|
| 2191 | + |
---|
| 2192 | +static int xennet_create_page_pool(struct netfront_queue *queue) |
---|
1867 | 2193 | { |
---|
1868 | | - unsigned int i; |
---|
| 2194 | + int err; |
---|
| 2195 | + struct page_pool_params pp_params = { |
---|
| 2196 | + .order = 0, |
---|
| 2197 | + .flags = 0, |
---|
| 2198 | + .pool_size = NET_RX_RING_SIZE, |
---|
| 2199 | + .nid = NUMA_NO_NODE, |
---|
| 2200 | + .dev = &queue->info->netdev->dev, |
---|
| 2201 | + .offset = XDP_PACKET_HEADROOM, |
---|
| 2202 | + .max_len = XEN_PAGE_SIZE - XDP_PACKET_HEADROOM, |
---|
| 2203 | + }; |
---|
1869 | 2204 | |
---|
1870 | | - for (i = 0; i < info->netdev->real_num_tx_queues; i++) { |
---|
1871 | | - struct netfront_queue *queue = &info->queues[i]; |
---|
1872 | | - |
---|
1873 | | - if (netif_running(info->netdev)) |
---|
1874 | | - napi_disable(&queue->napi); |
---|
1875 | | - netif_napi_del(&queue->napi); |
---|
| 2205 | + queue->page_pool = page_pool_create(&pp_params); |
---|
| 2206 | + if (IS_ERR(queue->page_pool)) { |
---|
| 2207 | + err = PTR_ERR(queue->page_pool); |
---|
| 2208 | + queue->page_pool = NULL; |
---|
| 2209 | + return err; |
---|
1876 | 2210 | } |
---|
1877 | 2211 | |
---|
1878 | | - kfree(info->queues); |
---|
1879 | | - info->queues = NULL; |
---|
| 2212 | + err = xdp_rxq_info_reg(&queue->xdp_rxq, queue->info->netdev, |
---|
| 2213 | + queue->id); |
---|
| 2214 | + if (err) { |
---|
| 2215 | + netdev_err(queue->info->netdev, "xdp_rxq_info_reg failed\n"); |
---|
| 2216 | + goto err_free_pp; |
---|
| 2217 | + } |
---|
| 2218 | + |
---|
| 2219 | + err = xdp_rxq_info_reg_mem_model(&queue->xdp_rxq, |
---|
| 2220 | + MEM_TYPE_PAGE_POOL, queue->page_pool); |
---|
| 2221 | + if (err) { |
---|
| 2222 | + netdev_err(queue->info->netdev, "xdp_rxq_info_reg_mem_model failed\n"); |
---|
| 2223 | + goto err_unregister_rxq; |
---|
| 2224 | + } |
---|
| 2225 | + return 0; |
---|
| 2226 | + |
---|
| 2227 | +err_unregister_rxq: |
---|
| 2228 | + xdp_rxq_info_unreg(&queue->xdp_rxq); |
---|
| 2229 | +err_free_pp: |
---|
| 2230 | + page_pool_destroy(queue->page_pool); |
---|
| 2231 | + queue->page_pool = NULL; |
---|
| 2232 | + return err; |
---|
1880 | 2233 | } |
---|
1881 | 2234 | |
---|
1882 | 2235 | static int xennet_create_queues(struct netfront_info *info, |
---|
.. | .. |
---|
1902 | 2255 | "only created %d queues\n", i); |
---|
1903 | 2256 | *num_queues = i; |
---|
1904 | 2257 | break; |
---|
| 2258 | + } |
---|
| 2259 | + |
---|
| 2260 | + /* use page pool recycling instead of buddy allocator */ |
---|
| 2261 | + ret = xennet_create_page_pool(queue); |
---|
| 2262 | + if (ret < 0) { |
---|
| 2263 | + dev_err(&info->xbdev->dev, "can't allocate page pool\n"); |
---|
| 2264 | + *num_queues = i; |
---|
| 2265 | + return ret; |
---|
1905 | 2266 | } |
---|
1906 | 2267 | |
---|
1907 | 2268 | netif_napi_add(queue->info->netdev, &queue->napi, |
---|
.. | .. |
---|
1934 | 2295 | |
---|
1935 | 2296 | info->netdev->irq = 0; |
---|
1936 | 2297 | |
---|
| 2298 | + /* Check if backend is trusted. */ |
---|
| 2299 | + info->bounce = !xennet_trusted || |
---|
| 2300 | + !xenbus_read_unsigned(dev->nodename, "trusted", 1); |
---|
| 2301 | + |
---|
1937 | 2302 | /* Check if backend supports multiple queues */ |
---|
1938 | 2303 | max_queues = xenbus_read_unsigned(info->xbdev->otherend, |
---|
1939 | 2304 | "multi-queue-max-queues", 1); |
---|
.. | .. |
---|
1948 | 2313 | if (err) { |
---|
1949 | 2314 | xenbus_dev_fatal(dev, err, "parsing %s/mac", dev->nodename); |
---|
1950 | 2315 | goto out_unlocked; |
---|
| 2316 | + } |
---|
| 2317 | + |
---|
| 2318 | + info->netback_has_xdp_headroom = xenbus_read_unsigned(info->xbdev->otherend, |
---|
| 2319 | + "feature-xdp-headroom", 0); |
---|
| 2320 | + if (info->netback_has_xdp_headroom) { |
---|
| 2321 | + /* set the current xen-netfront xdp state */ |
---|
| 2322 | + err = talk_to_netback_xdp(info, info->netfront_xdp_enabled ? |
---|
| 2323 | + NETBACK_XDP_HEADROOM_ENABLE : |
---|
| 2324 | + NETBACK_XDP_HEADROOM_DISABLE); |
---|
| 2325 | + if (err) |
---|
| 2326 | + goto out_unlocked; |
---|
1951 | 2327 | } |
---|
1952 | 2328 | |
---|
1953 | 2329 | rtnl_lock(); |
---|
.. | .. |
---|
2087 | 2463 | err = talk_to_netback(np->xbdev, np); |
---|
2088 | 2464 | if (err) |
---|
2089 | 2465 | return err; |
---|
| 2466 | + if (np->netback_has_xdp_headroom) |
---|
| 2467 | + pr_info("backend supports XDP headroom\n"); |
---|
| 2468 | + if (np->bounce) |
---|
| 2469 | + dev_info(&np->xbdev->dev, |
---|
| 2470 | + "bouncing transmitted data to zeroed pages\n"); |
---|
2090 | 2471 | |
---|
2091 | 2472 | /* talk_to_netback() sets the correct number of queues */ |
---|
2092 | 2473 | num_queues = dev->real_num_tx_queues; |
---|
.. | .. |
---|
2170 | 2551 | case XenbusStateClosed: |
---|
2171 | 2552 | if (dev->state == XenbusStateClosed) |
---|
2172 | 2553 | break; |
---|
2173 | | - /* Missed the backend's CLOSING state -- fallthrough */ |
---|
| 2554 | + fallthrough; /* Missed the backend's CLOSING state */ |
---|
2174 | 2555 | case XenbusStateClosing: |
---|
2175 | 2556 | xenbus_frontend_closed(dev); |
---|
2176 | 2557 | break; |
---|
.. | .. |
---|
2227 | 2608 | .get_sset_count = xennet_get_sset_count, |
---|
2228 | 2609 | .get_ethtool_stats = xennet_get_ethtool_stats, |
---|
2229 | 2610 | .get_strings = xennet_get_strings, |
---|
| 2611 | + .get_ts_info = ethtool_op_get_ts_info, |
---|
2230 | 2612 | }; |
---|
2231 | 2613 | |
---|
2232 | 2614 | #ifdef CONFIG_SYSFS |
---|