| .. | .. |
|---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-or-later |
|---|
| 1 | 2 | /* |
|---|
| 2 | 3 | * TUN - Universal TUN/TAP device driver. |
|---|
| 3 | 4 | * Copyright (C) 1999-2002 Maxim Krasnyansky <maxk@qualcomm.com> |
|---|
| 4 | | - * |
|---|
| 5 | | - * This program is free software; you can redistribute it and/or modify |
|---|
| 6 | | - * it under the terms of the GNU General Public License as published by |
|---|
| 7 | | - * the Free Software Foundation; either version 2 of the License, or |
|---|
| 8 | | - * (at your option) any later version. |
|---|
| 9 | | - * |
|---|
| 10 | | - * This program is distributed in the hope that it will be useful, |
|---|
| 11 | | - * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|---|
| 12 | | - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|---|
| 13 | | - * GNU General Public License for more details. |
|---|
| 14 | 5 | * |
|---|
| 15 | 6 | * $Id: tun.c,v 1.15 2002/03/01 02:44:24 maxk Exp $ |
|---|
| 16 | 7 | */ |
|---|
| .. | .. |
|---|
| 71 | 62 | #include <net/rtnetlink.h> |
|---|
| 72 | 63 | #include <net/sock.h> |
|---|
| 73 | 64 | #include <net/xdp.h> |
|---|
| 65 | +#include <net/ip_tunnels.h> |
|---|
| 74 | 66 | #include <linux/seq_file.h> |
|---|
| 75 | 67 | #include <linux/uio.h> |
|---|
| 76 | 68 | #include <linux/skb_array.h> |
|---|
| .. | .. |
|---|
| 92 | 84 | static void tun_default_link_ksettings(struct net_device *dev, |
|---|
| 93 | 85 | struct ethtool_link_ksettings *cmd); |
|---|
| 94 | 86 | |
|---|
| 95 | | -/* Uncomment to enable debugging */ |
|---|
| 96 | | -/* #define TUN_DEBUG 1 */ |
|---|
| 97 | | - |
|---|
| 98 | | -#ifdef TUN_DEBUG |
|---|
| 99 | | -static int debug; |
|---|
| 100 | | - |
|---|
| 101 | | -#define tun_debug(level, tun, fmt, args...) \ |
|---|
| 102 | | -do { \ |
|---|
| 103 | | - if (tun->debug) \ |
|---|
| 104 | | - netdev_printk(level, tun->dev, fmt, ##args); \ |
|---|
| 105 | | -} while (0) |
|---|
| 106 | | -#define DBG1(level, fmt, args...) \ |
|---|
| 107 | | -do { \ |
|---|
| 108 | | - if (debug == 2) \ |
|---|
| 109 | | - printk(level fmt, ##args); \ |
|---|
| 110 | | -} while (0) |
|---|
| 111 | | -#else |
|---|
| 112 | | -#define tun_debug(level, tun, fmt, args...) \ |
|---|
| 113 | | -do { \ |
|---|
| 114 | | - if (0) \ |
|---|
| 115 | | - netdev_printk(level, tun->dev, fmt, ##args); \ |
|---|
| 116 | | -} while (0) |
|---|
| 117 | | -#define DBG1(level, fmt, args...) \ |
|---|
| 118 | | -do { \ |
|---|
| 119 | | - if (0) \ |
|---|
| 120 | | - printk(level fmt, ##args); \ |
|---|
| 121 | | -} while (0) |
|---|
| 122 | | -#endif |
|---|
| 123 | | - |
|---|
| 124 | | -#define TUN_HEADROOM 256 |
|---|
| 125 | 87 | #define TUN_RX_PAD (NET_IP_ALIGN + NET_SKB_PAD) |
|---|
| 126 | 88 | |
|---|
| 127 | 89 | /* TUN device flags */ |
|---|
| .. | .. |
|---|
| 154 | 116 | #define TUN_FLOW_EXPIRE (3 * HZ) |
|---|
| 155 | 117 | |
|---|
| 156 | 118 | struct tun_pcpu_stats { |
|---|
| 157 | | - u64 rx_packets; |
|---|
| 158 | | - u64 rx_bytes; |
|---|
| 159 | | - u64 tx_packets; |
|---|
| 160 | | - u64 tx_bytes; |
|---|
| 119 | + u64_stats_t rx_packets; |
|---|
| 120 | + u64_stats_t rx_bytes; |
|---|
| 121 | + u64_stats_t tx_packets; |
|---|
| 122 | + u64_stats_t tx_bytes; |
|---|
| 161 | 123 | struct u64_stats_sync syncp; |
|---|
| 162 | 124 | u32 rx_dropped; |
|---|
| 163 | 125 | u32 tx_dropped; |
|---|
| .. | .. |
|---|
| 178 | 140 | struct tun_file { |
|---|
| 179 | 141 | struct sock sk; |
|---|
| 180 | 142 | struct socket socket; |
|---|
| 181 | | - struct socket_wq wq; |
|---|
| 182 | 143 | struct tun_struct __rcu *tun; |
|---|
| 183 | 144 | struct fasync_struct *fasync; |
|---|
| 184 | 145 | /* only used for fasnyc */ |
|---|
| .. | .. |
|---|
| 197 | 158 | struct xdp_rxq_info xdp_rxq; |
|---|
| 198 | 159 | }; |
|---|
| 199 | 160 | |
|---|
| 161 | +struct tun_page { |
|---|
| 162 | + struct page *page; |
|---|
| 163 | + int count; |
|---|
| 164 | +}; |
|---|
| 165 | + |
|---|
| 200 | 166 | struct tun_flow_entry { |
|---|
| 201 | 167 | struct hlist_node hash_link; |
|---|
| 202 | 168 | struct rcu_head rcu; |
|---|
| .. | .. |
|---|
| 205 | 171 | u32 rxhash; |
|---|
| 206 | 172 | u32 rps_rxhash; |
|---|
| 207 | 173 | int queue_index; |
|---|
| 208 | | - unsigned long updated; |
|---|
| 174 | + unsigned long updated ____cacheline_aligned_in_smp; |
|---|
| 209 | 175 | }; |
|---|
| 210 | 176 | |
|---|
| 211 | 177 | #define TUN_NUM_FLOW_ENTRIES 1024 |
|---|
| .. | .. |
|---|
| 239 | 205 | struct sock_fprog fprog; |
|---|
| 240 | 206 | /* protected by rtnl lock */ |
|---|
| 241 | 207 | bool filter_attached; |
|---|
| 242 | | -#ifdef TUN_DEBUG |
|---|
| 243 | | - int debug; |
|---|
| 244 | | -#endif |
|---|
| 208 | + u32 msg_enable; |
|---|
| 245 | 209 | spinlock_t lock; |
|---|
| 246 | 210 | struct hlist_head flows[TUN_NUM_FLOW_ENTRIES]; |
|---|
| 247 | 211 | struct timer_list flow_gc_timer; |
|---|
| .. | .. |
|---|
| 256 | 220 | struct tun_prog __rcu *steering_prog; |
|---|
| 257 | 221 | struct tun_prog __rcu *filter_prog; |
|---|
| 258 | 222 | struct ethtool_link_ksettings link_ksettings; |
|---|
| 223 | + /* init args */ |
|---|
| 224 | + struct file *file; |
|---|
| 225 | + struct ifreq *ifr; |
|---|
| 259 | 226 | }; |
|---|
| 260 | 227 | |
|---|
| 261 | 228 | struct veth { |
|---|
| .. | .. |
|---|
| 263 | 230 | __be16 h_vlan_TCI; |
|---|
| 264 | 231 | }; |
|---|
| 265 | 232 | |
|---|
| 266 | | -bool tun_is_xdp_frame(void *ptr) |
|---|
| 267 | | -{ |
|---|
| 268 | | - return (unsigned long)ptr & TUN_XDP_FLAG; |
|---|
| 269 | | -} |
|---|
| 270 | | -EXPORT_SYMBOL(tun_is_xdp_frame); |
|---|
| 271 | | - |
|---|
| 272 | | -void *tun_xdp_to_ptr(void *ptr) |
|---|
| 273 | | -{ |
|---|
| 274 | | - return (void *)((unsigned long)ptr | TUN_XDP_FLAG); |
|---|
| 275 | | -} |
|---|
| 276 | | -EXPORT_SYMBOL(tun_xdp_to_ptr); |
|---|
| 277 | | - |
|---|
| 278 | | -void *tun_ptr_to_xdp(void *ptr) |
|---|
| 279 | | -{ |
|---|
| 280 | | - return (void *)((unsigned long)ptr & ~TUN_XDP_FLAG); |
|---|
| 281 | | -} |
|---|
| 282 | | -EXPORT_SYMBOL(tun_ptr_to_xdp); |
|---|
| 233 | +static void tun_flow_init(struct tun_struct *tun); |
|---|
| 234 | +static void tun_flow_uninit(struct tun_struct *tun); |
|---|
| 283 | 235 | |
|---|
| 284 | 236 | static int tun_napi_receive(struct napi_struct *napi, int budget) |
|---|
| 285 | 237 | { |
|---|
| .. | .. |
|---|
| 331 | 283 | NAPI_POLL_WEIGHT); |
|---|
| 332 | 284 | napi_enable(&tfile->napi); |
|---|
| 333 | 285 | } |
|---|
| 286 | +} |
|---|
| 287 | + |
|---|
| 288 | +static void tun_napi_enable(struct tun_file *tfile) |
|---|
| 289 | +{ |
|---|
| 290 | + if (tfile->napi_enabled) |
|---|
| 291 | + napi_enable(&tfile->napi); |
|---|
| 334 | 292 | } |
|---|
| 335 | 293 | |
|---|
| 336 | 294 | static void tun_napi_disable(struct tun_file *tfile) |
|---|
| .. | .. |
|---|
| 437 | 395 | struct tun_flow_entry *e = kmalloc(sizeof(*e), GFP_ATOMIC); |
|---|
| 438 | 396 | |
|---|
| 439 | 397 | if (e) { |
|---|
| 440 | | - tun_debug(KERN_INFO, tun, "create flow: hash %u index %u\n", |
|---|
| 441 | | - rxhash, queue_index); |
|---|
| 398 | + netif_info(tun, tx_queued, tun->dev, |
|---|
| 399 | + "create flow: hash %u index %u\n", |
|---|
| 400 | + rxhash, queue_index); |
|---|
| 442 | 401 | e->updated = jiffies; |
|---|
| 443 | 402 | e->rxhash = rxhash; |
|---|
| 444 | 403 | e->rps_rxhash = 0; |
|---|
| .. | .. |
|---|
| 452 | 411 | |
|---|
| 453 | 412 | static void tun_flow_delete(struct tun_struct *tun, struct tun_flow_entry *e) |
|---|
| 454 | 413 | { |
|---|
| 455 | | - tun_debug(KERN_INFO, tun, "delete flow: hash %u index %u\n", |
|---|
| 456 | | - e->rxhash, e->queue_index); |
|---|
| 414 | + netif_info(tun, tx_queued, tun->dev, "delete flow: hash %u index %u\n", |
|---|
| 415 | + e->rxhash, e->queue_index); |
|---|
| 457 | 416 | hlist_del_rcu(&e->hash_link); |
|---|
| 458 | 417 | kfree_rcu(e, rcu); |
|---|
| 459 | 418 | --tun->flow_count; |
|---|
| .. | .. |
|---|
| 499 | 458 | unsigned long count = 0; |
|---|
| 500 | 459 | int i; |
|---|
| 501 | 460 | |
|---|
| 502 | | - tun_debug(KERN_INFO, tun, "tun_flow_cleanup\n"); |
|---|
| 503 | | - |
|---|
| 504 | 461 | spin_lock(&tun->lock); |
|---|
| 505 | 462 | for (i = 0; i < TUN_NUM_FLOW_ENTRIES; i++) { |
|---|
| 506 | 463 | struct tun_flow_entry *e; |
|---|
| .. | .. |
|---|
| 533 | 490 | unsigned long delay = tun->ageing_time; |
|---|
| 534 | 491 | u16 queue_index = tfile->queue_index; |
|---|
| 535 | 492 | |
|---|
| 536 | | - if (!rxhash) |
|---|
| 537 | | - return; |
|---|
| 538 | | - else |
|---|
| 539 | | - head = &tun->flows[tun_hashfn(rxhash)]; |
|---|
| 493 | + head = &tun->flows[tun_hashfn(rxhash)]; |
|---|
| 540 | 494 | |
|---|
| 541 | 495 | rcu_read_lock(); |
|---|
| 542 | 496 | |
|---|
| 543 | 497 | e = tun_flow_find(head, rxhash); |
|---|
| 544 | 498 | if (likely(e)) { |
|---|
| 545 | 499 | /* TODO: keep queueing to old queue until it's empty? */ |
|---|
| 546 | | - e->queue_index = queue_index; |
|---|
| 547 | | - e->updated = jiffies; |
|---|
| 500 | + if (READ_ONCE(e->queue_index) != queue_index) |
|---|
| 501 | + WRITE_ONCE(e->queue_index, queue_index); |
|---|
| 502 | + if (e->updated != jiffies) |
|---|
| 503 | + e->updated = jiffies; |
|---|
| 548 | 504 | sock_rps_record_flow_hash(e->rps_rxhash); |
|---|
| 549 | 505 | } else { |
|---|
| 550 | 506 | spin_lock_bh(&tun->lock); |
|---|
| .. | .. |
|---|
| 561 | 517 | rcu_read_unlock(); |
|---|
| 562 | 518 | } |
|---|
| 563 | 519 | |
|---|
| 564 | | -/** |
|---|
| 565 | | - * Save the hash received in the stack receive path and update the |
|---|
| 520 | +/* Save the hash received in the stack receive path and update the |
|---|
| 566 | 521 | * flow_hash table accordingly. |
|---|
| 567 | 522 | */ |
|---|
| 568 | 523 | static inline void tun_flow_save_rps_rxhash(struct tun_flow_entry *e, u32 hash) |
|---|
| .. | .. |
|---|
| 571 | 526 | e->rps_rxhash = hash; |
|---|
| 572 | 527 | } |
|---|
| 573 | 528 | |
|---|
| 574 | | -/* We try to identify a flow through its rxhash first. The reason that |
|---|
| 529 | +/* We try to identify a flow through its rxhash. The reason that |
|---|
| 575 | 530 | * we do not check rxq no. is because some cards(e.g 82599), chooses |
|---|
| 576 | 531 | * the rxq based on the txq where the last packet of the flow comes. As |
|---|
| 577 | 532 | * the userspace application move between processors, we may get a |
|---|
| 578 | | - * different rxq no. here. If we could not get rxhash, then we would |
|---|
| 579 | | - * hope the rxq no. may help here. |
|---|
| 533 | + * different rxq no. here. |
|---|
| 580 | 534 | */ |
|---|
| 581 | 535 | static u16 tun_automq_select_queue(struct tun_struct *tun, struct sk_buff *skb) |
|---|
| 582 | 536 | { |
|---|
| .. | .. |
|---|
| 587 | 541 | numqueues = READ_ONCE(tun->numqueues); |
|---|
| 588 | 542 | |
|---|
| 589 | 543 | txq = __skb_get_hash_symmetric(skb); |
|---|
| 590 | | - if (txq) { |
|---|
| 591 | | - e = tun_flow_find(&tun->flows[tun_hashfn(txq)], txq); |
|---|
| 592 | | - if (e) { |
|---|
| 593 | | - tun_flow_save_rps_rxhash(e, txq); |
|---|
| 594 | | - txq = e->queue_index; |
|---|
| 595 | | - } else |
|---|
| 596 | | - /* use multiply and shift instead of expensive divide */ |
|---|
| 597 | | - txq = ((u64)txq * numqueues) >> 32; |
|---|
| 598 | | - } else if (likely(skb_rx_queue_recorded(skb))) { |
|---|
| 599 | | - txq = skb_get_rx_queue(skb); |
|---|
| 600 | | - while (unlikely(txq >= numqueues)) |
|---|
| 601 | | - txq -= numqueues; |
|---|
| 544 | + e = tun_flow_find(&tun->flows[tun_hashfn(txq)], txq); |
|---|
| 545 | + if (e) { |
|---|
| 546 | + tun_flow_save_rps_rxhash(e, txq); |
|---|
| 547 | + txq = e->queue_index; |
|---|
| 548 | + } else { |
|---|
| 549 | + /* use multiply and shift instead of expensive divide */ |
|---|
| 550 | + txq = ((u64)txq * numqueues) >> 32; |
|---|
| 602 | 551 | } |
|---|
| 603 | 552 | |
|---|
| 604 | 553 | return txq; |
|---|
| .. | .. |
|---|
| 622 | 571 | } |
|---|
| 623 | 572 | |
|---|
| 624 | 573 | static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb, |
|---|
| 625 | | - struct net_device *sb_dev, |
|---|
| 626 | | - select_queue_fallback_t fallback) |
|---|
| 574 | + struct net_device *sb_dev) |
|---|
| 627 | 575 | { |
|---|
| 628 | 576 | struct tun_struct *tun = netdev_priv(dev); |
|---|
| 629 | 577 | u16 ret; |
|---|
| .. | .. |
|---|
| 704 | 652 | tun = rtnl_dereference(tfile->tun); |
|---|
| 705 | 653 | |
|---|
| 706 | 654 | if (tun && clean) { |
|---|
| 707 | | - tun_napi_disable(tfile); |
|---|
| 655 | + if (!tfile->detached) |
|---|
| 656 | + tun_napi_disable(tfile); |
|---|
| 708 | 657 | tun_napi_del(tfile); |
|---|
| 709 | 658 | } |
|---|
| 710 | 659 | |
|---|
| .. | .. |
|---|
| 723 | 672 | if (clean) { |
|---|
| 724 | 673 | RCU_INIT_POINTER(tfile->tun, NULL); |
|---|
| 725 | 674 | sock_put(&tfile->sk); |
|---|
| 726 | | - } else |
|---|
| 675 | + } else { |
|---|
| 727 | 676 | tun_disable_queue(tun, tfile); |
|---|
| 677 | + tun_napi_disable(tfile); |
|---|
| 678 | + } |
|---|
| 728 | 679 | |
|---|
| 729 | 680 | synchronize_net(); |
|---|
| 730 | 681 | tun_flow_delete_by_queue(tun, tun->numqueues + 1); |
|---|
| .. | .. |
|---|
| 747 | 698 | if (tun) |
|---|
| 748 | 699 | xdp_rxq_info_unreg(&tfile->xdp_rxq); |
|---|
| 749 | 700 | ptr_ring_cleanup(&tfile->tx_ring, tun_ptr_free); |
|---|
| 750 | | - sock_put(&tfile->sk); |
|---|
| 751 | 701 | } |
|---|
| 752 | 702 | } |
|---|
| 753 | 703 | |
|---|
| .. | .. |
|---|
| 763 | 713 | if (dev) |
|---|
| 764 | 714 | netdev_state_change(dev); |
|---|
| 765 | 715 | rtnl_unlock(); |
|---|
| 716 | + |
|---|
| 717 | + if (clean) |
|---|
| 718 | + sock_put(&tfile->sk); |
|---|
| 766 | 719 | } |
|---|
| 767 | 720 | |
|---|
| 768 | 721 | static void tun_detach_all(struct net_device *dev) |
|---|
| .. | .. |
|---|
| 797 | 750 | sock_put(&tfile->sk); |
|---|
| 798 | 751 | } |
|---|
| 799 | 752 | list_for_each_entry_safe(tfile, tmp, &tun->disabled, next) { |
|---|
| 753 | + tun_napi_del(tfile); |
|---|
| 800 | 754 | tun_enable_queue(tfile); |
|---|
| 801 | 755 | tun_queue_purge(tfile); |
|---|
| 802 | 756 | xdp_rxq_info_unreg(&tfile->xdp_rxq); |
|---|
| .. | .. |
|---|
| 877 | 831 | |
|---|
| 878 | 832 | if (tfile->detached) { |
|---|
| 879 | 833 | tun_enable_queue(tfile); |
|---|
| 834 | + tun_napi_enable(tfile); |
|---|
| 880 | 835 | } else { |
|---|
| 881 | 836 | sock_hold(&tfile->sk); |
|---|
| 882 | 837 | tun_napi_init(tun, tfile, napi, napi_frags); |
|---|
| 883 | 838 | } |
|---|
| 839 | + |
|---|
| 840 | + if (rtnl_dereference(tun->xdp_prog)) |
|---|
| 841 | + sock_set_flag(&tfile->sk, SOCK_XDP); |
|---|
| 884 | 842 | |
|---|
| 885 | 843 | /* device is allowed to go away first, so no need to hold extra |
|---|
| 886 | 844 | * refcnt. |
|---|
| .. | .. |
|---|
| 1025 | 983 | |
|---|
| 1026 | 984 | static const struct ethtool_ops tun_ethtool_ops; |
|---|
| 1027 | 985 | |
|---|
| 986 | +static int tun_net_init(struct net_device *dev) |
|---|
| 987 | +{ |
|---|
| 988 | + struct tun_struct *tun = netdev_priv(dev); |
|---|
| 989 | + struct ifreq *ifr = tun->ifr; |
|---|
| 990 | + int err; |
|---|
| 991 | + |
|---|
| 992 | + tun->pcpu_stats = netdev_alloc_pcpu_stats(struct tun_pcpu_stats); |
|---|
| 993 | + if (!tun->pcpu_stats) |
|---|
| 994 | + return -ENOMEM; |
|---|
| 995 | + |
|---|
| 996 | + spin_lock_init(&tun->lock); |
|---|
| 997 | + |
|---|
| 998 | + err = security_tun_dev_alloc_security(&tun->security); |
|---|
| 999 | + if (err < 0) { |
|---|
| 1000 | + free_percpu(tun->pcpu_stats); |
|---|
| 1001 | + return err; |
|---|
| 1002 | + } |
|---|
| 1003 | + |
|---|
| 1004 | + tun_flow_init(tun); |
|---|
| 1005 | + |
|---|
| 1006 | + dev->hw_features = NETIF_F_SG | NETIF_F_FRAGLIST | |
|---|
| 1007 | + TUN_USER_FEATURES | NETIF_F_HW_VLAN_CTAG_TX | |
|---|
| 1008 | + NETIF_F_HW_VLAN_STAG_TX; |
|---|
| 1009 | + dev->features = dev->hw_features | NETIF_F_LLTX; |
|---|
| 1010 | + dev->vlan_features = dev->features & |
|---|
| 1011 | + ~(NETIF_F_HW_VLAN_CTAG_TX | |
|---|
| 1012 | + NETIF_F_HW_VLAN_STAG_TX); |
|---|
| 1013 | + |
|---|
| 1014 | + tun->flags = (tun->flags & ~TUN_FEATURES) | |
|---|
| 1015 | + (ifr->ifr_flags & TUN_FEATURES); |
|---|
| 1016 | + |
|---|
| 1017 | + INIT_LIST_HEAD(&tun->disabled); |
|---|
| 1018 | + err = tun_attach(tun, tun->file, false, ifr->ifr_flags & IFF_NAPI, |
|---|
| 1019 | + ifr->ifr_flags & IFF_NAPI_FRAGS, false); |
|---|
| 1020 | + if (err < 0) { |
|---|
| 1021 | + tun_flow_uninit(tun); |
|---|
| 1022 | + security_tun_dev_free_security(tun->security); |
|---|
| 1023 | + free_percpu(tun->pcpu_stats); |
|---|
| 1024 | + return err; |
|---|
| 1025 | + } |
|---|
| 1026 | + return 0; |
|---|
| 1027 | +} |
|---|
| 1028 | + |
|---|
| 1028 | 1029 | /* Net device detach from fd. */ |
|---|
| 1029 | 1030 | static void tun_net_uninit(struct net_device *dev) |
|---|
| 1030 | 1031 | { |
|---|
| .. | .. |
|---|
| 1050 | 1051 | static void tun_automq_xmit(struct tun_struct *tun, struct sk_buff *skb) |
|---|
| 1051 | 1052 | { |
|---|
| 1052 | 1053 | #ifdef CONFIG_RPS |
|---|
| 1053 | | - if (tun->numqueues == 1 && static_key_false(&rps_needed)) { |
|---|
| 1054 | + if (tun->numqueues == 1 && static_branch_unlikely(&rps_needed)) { |
|---|
| 1054 | 1055 | /* Select queue was not called for the skbuff, so we extract the |
|---|
| 1055 | 1056 | * RPS hash and save it into the flow_table here. |
|---|
| 1056 | 1057 | */ |
|---|
| 1058 | + struct tun_flow_entry *e; |
|---|
| 1057 | 1059 | __u32 rxhash; |
|---|
| 1058 | 1060 | |
|---|
| 1059 | 1061 | rxhash = __skb_get_hash_symmetric(skb); |
|---|
| 1060 | | - if (rxhash) { |
|---|
| 1061 | | - struct tun_flow_entry *e; |
|---|
| 1062 | | - e = tun_flow_find(&tun->flows[tun_hashfn(rxhash)], |
|---|
| 1063 | | - rxhash); |
|---|
| 1064 | | - if (e) |
|---|
| 1065 | | - tun_flow_save_rps_rxhash(e, rxhash); |
|---|
| 1066 | | - } |
|---|
| 1062 | + e = tun_flow_find(&tun->flows[tun_hashfn(rxhash)], rxhash); |
|---|
| 1063 | + if (e) |
|---|
| 1064 | + tun_flow_save_rps_rxhash(e, rxhash); |
|---|
| 1067 | 1065 | } |
|---|
| 1068 | 1066 | #endif |
|---|
| 1069 | 1067 | } |
|---|
| .. | .. |
|---|
| 1099 | 1097 | if (!rcu_dereference(tun->steering_prog)) |
|---|
| 1100 | 1098 | tun_automq_xmit(tun, skb); |
|---|
| 1101 | 1099 | |
|---|
| 1102 | | - tun_debug(KERN_INFO, tun, "tun_net_xmit %d\n", skb->len); |
|---|
| 1103 | | - |
|---|
| 1104 | | - BUG_ON(!tfile); |
|---|
| 1100 | + netif_info(tun, tx_queued, tun->dev, "%s %d\n", __func__, skb->len); |
|---|
| 1105 | 1101 | |
|---|
| 1106 | 1102 | /* Drop if the filter does not like it. |
|---|
| 1107 | 1103 | * This is a noop if the filter is disabled. |
|---|
| .. | .. |
|---|
| 1127 | 1123 | */ |
|---|
| 1128 | 1124 | skb_orphan(skb); |
|---|
| 1129 | 1125 | |
|---|
| 1130 | | - nf_reset(skb); |
|---|
| 1126 | + nf_reset_ct(skb); |
|---|
| 1131 | 1127 | |
|---|
| 1132 | 1128 | if (ptr_ring_produce(&tfile->tx_ring, skb)) |
|---|
| 1133 | 1129 | goto drop; |
|---|
| .. | .. |
|---|
| 1194 | 1190 | p = per_cpu_ptr(tun->pcpu_stats, i); |
|---|
| 1195 | 1191 | do { |
|---|
| 1196 | 1192 | start = u64_stats_fetch_begin(&p->syncp); |
|---|
| 1197 | | - rxpackets = p->rx_packets; |
|---|
| 1198 | | - rxbytes = p->rx_bytes; |
|---|
| 1199 | | - txpackets = p->tx_packets; |
|---|
| 1200 | | - txbytes = p->tx_bytes; |
|---|
| 1193 | + rxpackets = u64_stats_read(&p->rx_packets); |
|---|
| 1194 | + rxbytes = u64_stats_read(&p->rx_bytes); |
|---|
| 1195 | + txpackets = u64_stats_read(&p->tx_packets); |
|---|
| 1196 | + txbytes = u64_stats_read(&p->tx_bytes); |
|---|
| 1201 | 1197 | } while (u64_stats_fetch_retry(&p->syncp, start)); |
|---|
| 1202 | 1198 | |
|---|
| 1203 | 1199 | stats->rx_packets += rxpackets; |
|---|
| .. | .. |
|---|
| 1219 | 1215 | struct netlink_ext_ack *extack) |
|---|
| 1220 | 1216 | { |
|---|
| 1221 | 1217 | struct tun_struct *tun = netdev_priv(dev); |
|---|
| 1218 | + struct tun_file *tfile; |
|---|
| 1222 | 1219 | struct bpf_prog *old_prog; |
|---|
| 1220 | + int i; |
|---|
| 1223 | 1221 | |
|---|
| 1224 | 1222 | old_prog = rtnl_dereference(tun->xdp_prog); |
|---|
| 1225 | 1223 | rcu_assign_pointer(tun->xdp_prog, prog); |
|---|
| 1226 | 1224 | if (old_prog) |
|---|
| 1227 | 1225 | bpf_prog_put(old_prog); |
|---|
| 1228 | 1226 | |
|---|
| 1229 | | - return 0; |
|---|
| 1230 | | -} |
|---|
| 1231 | | - |
|---|
| 1232 | | -static u32 tun_xdp_query(struct net_device *dev) |
|---|
| 1233 | | -{ |
|---|
| 1234 | | - struct tun_struct *tun = netdev_priv(dev); |
|---|
| 1235 | | - const struct bpf_prog *xdp_prog; |
|---|
| 1236 | | - |
|---|
| 1237 | | - xdp_prog = rtnl_dereference(tun->xdp_prog); |
|---|
| 1238 | | - if (xdp_prog) |
|---|
| 1239 | | - return xdp_prog->aux->id; |
|---|
| 1227 | + for (i = 0; i < tun->numqueues; i++) { |
|---|
| 1228 | + tfile = rtnl_dereference(tun->tfiles[i]); |
|---|
| 1229 | + if (prog) |
|---|
| 1230 | + sock_set_flag(&tfile->sk, SOCK_XDP); |
|---|
| 1231 | + else |
|---|
| 1232 | + sock_reset_flag(&tfile->sk, SOCK_XDP); |
|---|
| 1233 | + } |
|---|
| 1234 | + list_for_each_entry(tfile, &tun->disabled, next) { |
|---|
| 1235 | + if (prog) |
|---|
| 1236 | + sock_set_flag(&tfile->sk, SOCK_XDP); |
|---|
| 1237 | + else |
|---|
| 1238 | + sock_reset_flag(&tfile->sk, SOCK_XDP); |
|---|
| 1239 | + } |
|---|
| 1240 | 1240 | |
|---|
| 1241 | 1241 | return 0; |
|---|
| 1242 | 1242 | } |
|---|
| .. | .. |
|---|
| 1246 | 1246 | switch (xdp->command) { |
|---|
| 1247 | 1247 | case XDP_SETUP_PROG: |
|---|
| 1248 | 1248 | return tun_xdp_set(dev, xdp->prog, xdp->extack); |
|---|
| 1249 | | - case XDP_QUERY_PROG: |
|---|
| 1250 | | - xdp->prog_id = tun_xdp_query(dev); |
|---|
| 1251 | | - return 0; |
|---|
| 1252 | 1249 | default: |
|---|
| 1253 | 1250 | return -EINVAL; |
|---|
| 1254 | 1251 | } |
|---|
| 1255 | 1252 | } |
|---|
| 1256 | 1253 | |
|---|
| 1254 | +static int tun_net_change_carrier(struct net_device *dev, bool new_carrier) |
|---|
| 1255 | +{ |
|---|
| 1256 | + if (new_carrier) { |
|---|
| 1257 | + struct tun_struct *tun = netdev_priv(dev); |
|---|
| 1258 | + |
|---|
| 1259 | + if (!tun->numqueues) |
|---|
| 1260 | + return -EPERM; |
|---|
| 1261 | + |
|---|
| 1262 | + netif_carrier_on(dev); |
|---|
| 1263 | + } else { |
|---|
| 1264 | + netif_carrier_off(dev); |
|---|
| 1265 | + } |
|---|
| 1266 | + return 0; |
|---|
| 1267 | +} |
|---|
| 1268 | + |
|---|
| 1257 | 1269 | static const struct net_device_ops tun_netdev_ops = { |
|---|
| 1270 | + .ndo_init = tun_net_init, |
|---|
| 1258 | 1271 | .ndo_uninit = tun_net_uninit, |
|---|
| 1259 | 1272 | .ndo_open = tun_net_open, |
|---|
| 1260 | 1273 | .ndo_stop = tun_net_close, |
|---|
| .. | .. |
|---|
| 1263 | 1276 | .ndo_select_queue = tun_select_queue, |
|---|
| 1264 | 1277 | .ndo_set_rx_headroom = tun_set_headroom, |
|---|
| 1265 | 1278 | .ndo_get_stats64 = tun_net_get_stats64, |
|---|
| 1279 | + .ndo_change_carrier = tun_net_change_carrier, |
|---|
| 1266 | 1280 | }; |
|---|
| 1267 | 1281 | |
|---|
| 1268 | 1282 | static void __tun_xdp_flush_tfile(struct tun_file *tfile) |
|---|
| .. | .. |
|---|
| 1325 | 1339 | |
|---|
| 1326 | 1340 | static int tun_xdp_tx(struct net_device *dev, struct xdp_buff *xdp) |
|---|
| 1327 | 1341 | { |
|---|
| 1328 | | - struct xdp_frame *frame = convert_to_xdp_frame(xdp); |
|---|
| 1342 | + struct xdp_frame *frame = xdp_convert_buff_to_frame(xdp); |
|---|
| 1329 | 1343 | |
|---|
| 1330 | 1344 | if (unlikely(!frame)) |
|---|
| 1331 | 1345 | return -EOVERFLOW; |
|---|
| .. | .. |
|---|
| 1334 | 1348 | } |
|---|
| 1335 | 1349 | |
|---|
| 1336 | 1350 | static const struct net_device_ops tap_netdev_ops = { |
|---|
| 1351 | + .ndo_init = tun_net_init, |
|---|
| 1337 | 1352 | .ndo_uninit = tun_net_uninit, |
|---|
| 1338 | 1353 | .ndo_open = tun_net_open, |
|---|
| 1339 | 1354 | .ndo_stop = tun_net_close, |
|---|
| .. | .. |
|---|
| 1348 | 1363 | .ndo_get_stats64 = tun_net_get_stats64, |
|---|
| 1349 | 1364 | .ndo_bpf = tun_xdp, |
|---|
| 1350 | 1365 | .ndo_xdp_xmit = tun_xdp_xmit, |
|---|
| 1366 | + .ndo_change_carrier = tun_net_change_carrier, |
|---|
| 1351 | 1367 | }; |
|---|
| 1352 | 1368 | |
|---|
| 1353 | 1369 | static void tun_flow_init(struct tun_struct *tun) |
|---|
| .. | .. |
|---|
| 1373 | 1389 | #define MAX_MTU 65535 |
|---|
| 1374 | 1390 | |
|---|
| 1375 | 1391 | /* Initialize net device. */ |
|---|
| 1376 | | -static void tun_net_init(struct net_device *dev) |
|---|
| 1392 | +static void tun_net_initialize(struct net_device *dev) |
|---|
| 1377 | 1393 | { |
|---|
| 1378 | 1394 | struct tun_struct *tun = netdev_priv(dev); |
|---|
| 1379 | 1395 | |
|---|
| 1380 | 1396 | switch (tun->flags & TUN_TYPE_MASK) { |
|---|
| 1381 | 1397 | case IFF_TUN: |
|---|
| 1382 | 1398 | dev->netdev_ops = &tun_netdev_ops; |
|---|
| 1399 | + dev->header_ops = &ip_tunnel_header_ops; |
|---|
| 1383 | 1400 | |
|---|
| 1384 | 1401 | /* Point-to-Point TUN Device */ |
|---|
| 1385 | 1402 | dev->hard_header_len = 0; |
|---|
| .. | .. |
|---|
| 1429 | 1446 | |
|---|
| 1430 | 1447 | sk = tfile->socket.sk; |
|---|
| 1431 | 1448 | |
|---|
| 1432 | | - tun_debug(KERN_INFO, tun, "tun_chr_poll\n"); |
|---|
| 1433 | | - |
|---|
| 1434 | 1449 | poll_wait(file, sk_sleep(sk), wait); |
|---|
| 1435 | 1450 | |
|---|
| 1436 | 1451 | if (!ptr_ring_empty(&tfile->tx_ring)) |
|---|
| .. | .. |
|---|
| 1462 | 1477 | int err; |
|---|
| 1463 | 1478 | int i; |
|---|
| 1464 | 1479 | |
|---|
| 1465 | | - if (it->nr_segs > MAX_SKB_FRAGS + 1) |
|---|
| 1480 | + if (it->nr_segs > MAX_SKB_FRAGS + 1 || |
|---|
| 1481 | + len > (ETH_MAX_MTU - NET_SKB_PAD - NET_IP_ALIGN)) |
|---|
| 1466 | 1482 | return ERR_PTR(-EMSGSIZE); |
|---|
| 1467 | 1483 | |
|---|
| 1468 | 1484 | local_bh_disable(); |
|---|
| .. | .. |
|---|
| 1481 | 1497 | skb->truesize += skb->data_len; |
|---|
| 1482 | 1498 | |
|---|
| 1483 | 1499 | for (i = 1; i < it->nr_segs; i++) { |
|---|
| 1484 | | - struct page_frag *pfrag = ¤t->task_frag; |
|---|
| 1485 | 1500 | size_t fragsz = it->iov[i].iov_len; |
|---|
| 1501 | + struct page *page; |
|---|
| 1502 | + void *frag; |
|---|
| 1486 | 1503 | |
|---|
| 1487 | 1504 | if (fragsz == 0 || fragsz > PAGE_SIZE) { |
|---|
| 1488 | 1505 | err = -EINVAL; |
|---|
| 1489 | 1506 | goto free; |
|---|
| 1490 | 1507 | } |
|---|
| 1491 | | - |
|---|
| 1492 | | - if (!skb_page_frag_refill(fragsz, pfrag, GFP_KERNEL)) { |
|---|
| 1508 | + frag = netdev_alloc_frag(fragsz); |
|---|
| 1509 | + if (!frag) { |
|---|
| 1493 | 1510 | err = -ENOMEM; |
|---|
| 1494 | 1511 | goto free; |
|---|
| 1495 | 1512 | } |
|---|
| 1496 | | - |
|---|
| 1497 | | - skb_fill_page_desc(skb, i - 1, pfrag->page, |
|---|
| 1498 | | - pfrag->offset, fragsz); |
|---|
| 1499 | | - page_ref_inc(pfrag->page); |
|---|
| 1500 | | - pfrag->offset += fragsz; |
|---|
| 1513 | + page = virt_to_head_page(frag); |
|---|
| 1514 | + skb_fill_page_desc(skb, i - 1, page, |
|---|
| 1515 | + frag - page_address(page), fragsz); |
|---|
| 1501 | 1516 | } |
|---|
| 1502 | 1517 | |
|---|
| 1503 | 1518 | return skb; |
|---|
| .. | .. |
|---|
| 1589 | 1604 | if (zerocopy) |
|---|
| 1590 | 1605 | return false; |
|---|
| 1591 | 1606 | |
|---|
| 1592 | | - if (SKB_DATA_ALIGN(len + TUN_RX_PAD) + |
|---|
| 1607 | + if (SKB_DATA_ALIGN(len + TUN_RX_PAD + XDP_PACKET_HEADROOM) + |
|---|
| 1593 | 1608 | SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) > PAGE_SIZE) |
|---|
| 1594 | 1609 | return false; |
|---|
| 1595 | 1610 | |
|---|
| 1596 | 1611 | return true; |
|---|
| 1612 | +} |
|---|
| 1613 | + |
|---|
| 1614 | +static struct sk_buff *__tun_build_skb(struct tun_file *tfile, |
|---|
| 1615 | + struct page_frag *alloc_frag, char *buf, |
|---|
| 1616 | + int buflen, int len, int pad) |
|---|
| 1617 | +{ |
|---|
| 1618 | + struct sk_buff *skb = build_skb(buf, buflen); |
|---|
| 1619 | + |
|---|
| 1620 | + if (!skb) |
|---|
| 1621 | + return ERR_PTR(-ENOMEM); |
|---|
| 1622 | + |
|---|
| 1623 | + skb_reserve(skb, pad); |
|---|
| 1624 | + skb_put(skb, len); |
|---|
| 1625 | + skb_set_owner_w(skb, tfile->socket.sk); |
|---|
| 1626 | + |
|---|
| 1627 | + get_page(alloc_frag->page); |
|---|
| 1628 | + alloc_frag->offset += buflen; |
|---|
| 1629 | + |
|---|
| 1630 | + return skb; |
|---|
| 1631 | +} |
|---|
| 1632 | + |
|---|
| 1633 | +static int tun_xdp_act(struct tun_struct *tun, struct bpf_prog *xdp_prog, |
|---|
| 1634 | + struct xdp_buff *xdp, u32 act) |
|---|
| 1635 | +{ |
|---|
| 1636 | + int err; |
|---|
| 1637 | + |
|---|
| 1638 | + switch (act) { |
|---|
| 1639 | + case XDP_REDIRECT: |
|---|
| 1640 | + err = xdp_do_redirect(tun->dev, xdp, xdp_prog); |
|---|
| 1641 | + if (err) |
|---|
| 1642 | + return err; |
|---|
| 1643 | + break; |
|---|
| 1644 | + case XDP_TX: |
|---|
| 1645 | + err = tun_xdp_tx(tun->dev, xdp); |
|---|
| 1646 | + if (err < 0) |
|---|
| 1647 | + return err; |
|---|
| 1648 | + break; |
|---|
| 1649 | + case XDP_PASS: |
|---|
| 1650 | + break; |
|---|
| 1651 | + default: |
|---|
| 1652 | + bpf_warn_invalid_xdp_action(act); |
|---|
| 1653 | + fallthrough; |
|---|
| 1654 | + case XDP_ABORTED: |
|---|
| 1655 | + trace_xdp_exception(tun->dev, xdp_prog, act); |
|---|
| 1656 | + fallthrough; |
|---|
| 1657 | + case XDP_DROP: |
|---|
| 1658 | + this_cpu_inc(tun->pcpu_stats->rx_dropped); |
|---|
| 1659 | + break; |
|---|
| 1660 | + } |
|---|
| 1661 | + |
|---|
| 1662 | + return act; |
|---|
| 1597 | 1663 | } |
|---|
| 1598 | 1664 | |
|---|
| 1599 | 1665 | static struct sk_buff *tun_build_skb(struct tun_struct *tun, |
|---|
| .. | .. |
|---|
| 1603 | 1669 | int len, int *skb_xdp) |
|---|
| 1604 | 1670 | { |
|---|
| 1605 | 1671 | struct page_frag *alloc_frag = ¤t->task_frag; |
|---|
| 1606 | | - struct sk_buff *skb; |
|---|
| 1607 | 1672 | struct bpf_prog *xdp_prog; |
|---|
| 1608 | 1673 | int buflen = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); |
|---|
| 1609 | | - unsigned int delta = 0; |
|---|
| 1610 | 1674 | char *buf; |
|---|
| 1611 | 1675 | size_t copied; |
|---|
| 1612 | | - int err, pad = TUN_RX_PAD; |
|---|
| 1676 | + int pad = TUN_RX_PAD; |
|---|
| 1677 | + int err = 0; |
|---|
| 1613 | 1678 | |
|---|
| 1614 | 1679 | rcu_read_lock(); |
|---|
| 1615 | 1680 | xdp_prog = rcu_dereference(tun->xdp_prog); |
|---|
| 1616 | 1681 | if (xdp_prog) |
|---|
| 1617 | | - pad += TUN_HEADROOM; |
|---|
| 1682 | + pad += XDP_PACKET_HEADROOM; |
|---|
| 1618 | 1683 | buflen += SKB_DATA_ALIGN(len + pad); |
|---|
| 1619 | 1684 | rcu_read_unlock(); |
|---|
| 1620 | 1685 | |
|---|
| .. | .. |
|---|
| 1633 | 1698 | * of xdp_prog above, this should be rare and for simplicity |
|---|
| 1634 | 1699 | * we do XDP on skb in case the headroom is not enough. |
|---|
| 1635 | 1700 | */ |
|---|
| 1636 | | - if (hdr->gso_type || !xdp_prog) |
|---|
| 1701 | + if (hdr->gso_type || !xdp_prog) { |
|---|
| 1637 | 1702 | *skb_xdp = 1; |
|---|
| 1638 | | - else |
|---|
| 1639 | | - *skb_xdp = 0; |
|---|
| 1703 | + return __tun_build_skb(tfile, alloc_frag, buf, buflen, len, |
|---|
| 1704 | + pad); |
|---|
| 1705 | + } |
|---|
| 1706 | + |
|---|
| 1707 | + *skb_xdp = 0; |
|---|
| 1640 | 1708 | |
|---|
| 1641 | 1709 | local_bh_disable(); |
|---|
| 1642 | 1710 | rcu_read_lock(); |
|---|
| 1643 | 1711 | xdp_prog = rcu_dereference(tun->xdp_prog); |
|---|
| 1644 | | - if (xdp_prog && !*skb_xdp) { |
|---|
| 1712 | + if (xdp_prog) { |
|---|
| 1645 | 1713 | struct xdp_buff xdp; |
|---|
| 1646 | | - void *orig_data; |
|---|
| 1647 | 1714 | u32 act; |
|---|
| 1648 | 1715 | |
|---|
| 1649 | 1716 | xdp.data_hard_start = buf; |
|---|
| .. | .. |
|---|
| 1651 | 1718 | xdp_set_data_meta_invalid(&xdp); |
|---|
| 1652 | 1719 | xdp.data_end = xdp.data + len; |
|---|
| 1653 | 1720 | xdp.rxq = &tfile->xdp_rxq; |
|---|
| 1654 | | - orig_data = xdp.data; |
|---|
| 1721 | + xdp.frame_sz = buflen; |
|---|
| 1722 | + |
|---|
| 1655 | 1723 | act = bpf_prog_run_xdp(xdp_prog, &xdp); |
|---|
| 1656 | | - |
|---|
| 1657 | | - switch (act) { |
|---|
| 1658 | | - case XDP_REDIRECT: |
|---|
| 1724 | + if (act == XDP_REDIRECT || act == XDP_TX) { |
|---|
| 1659 | 1725 | get_page(alloc_frag->page); |
|---|
| 1660 | 1726 | alloc_frag->offset += buflen; |
|---|
| 1661 | | - err = xdp_do_redirect(tun->dev, &xdp, xdp_prog); |
|---|
| 1662 | | - xdp_do_flush_map(); |
|---|
| 1663 | | - if (err) |
|---|
| 1664 | | - goto err_redirect; |
|---|
| 1665 | | - rcu_read_unlock(); |
|---|
| 1666 | | - local_bh_enable(); |
|---|
| 1667 | | - return NULL; |
|---|
| 1668 | | - case XDP_TX: |
|---|
| 1669 | | - get_page(alloc_frag->page); |
|---|
| 1670 | | - alloc_frag->offset += buflen; |
|---|
| 1671 | | - if (tun_xdp_tx(tun->dev, &xdp) < 0) |
|---|
| 1672 | | - goto err_redirect; |
|---|
| 1673 | | - rcu_read_unlock(); |
|---|
| 1674 | | - local_bh_enable(); |
|---|
| 1675 | | - return NULL; |
|---|
| 1676 | | - case XDP_PASS: |
|---|
| 1677 | | - delta = orig_data - xdp.data; |
|---|
| 1678 | | - len = xdp.data_end - xdp.data; |
|---|
| 1679 | | - break; |
|---|
| 1680 | | - default: |
|---|
| 1681 | | - bpf_warn_invalid_xdp_action(act); |
|---|
| 1682 | | - /* fall through */ |
|---|
| 1683 | | - case XDP_ABORTED: |
|---|
| 1684 | | - trace_xdp_exception(tun->dev, xdp_prog, act); |
|---|
| 1685 | | - /* fall through */ |
|---|
| 1686 | | - case XDP_DROP: |
|---|
| 1687 | | - goto err_xdp; |
|---|
| 1688 | 1727 | } |
|---|
| 1728 | + err = tun_xdp_act(tun, xdp_prog, &xdp, act); |
|---|
| 1729 | + if (err < 0) { |
|---|
| 1730 | + if (act == XDP_REDIRECT || act == XDP_TX) |
|---|
| 1731 | + put_page(alloc_frag->page); |
|---|
| 1732 | + goto out; |
|---|
| 1733 | + } |
|---|
| 1734 | + |
|---|
| 1735 | + if (err == XDP_REDIRECT) |
|---|
| 1736 | + xdp_do_flush(); |
|---|
| 1737 | + if (err != XDP_PASS) |
|---|
| 1738 | + goto out; |
|---|
| 1739 | + |
|---|
| 1740 | + pad = xdp.data - xdp.data_hard_start; |
|---|
| 1741 | + len = xdp.data_end - xdp.data; |
|---|
| 1689 | 1742 | } |
|---|
| 1690 | | - |
|---|
| 1691 | | - skb = build_skb(buf, buflen); |
|---|
| 1692 | | - if (!skb) { |
|---|
| 1693 | | - rcu_read_unlock(); |
|---|
| 1694 | | - local_bh_enable(); |
|---|
| 1695 | | - return ERR_PTR(-ENOMEM); |
|---|
| 1696 | | - } |
|---|
| 1697 | | - |
|---|
| 1698 | | - skb_reserve(skb, pad - delta); |
|---|
| 1699 | | - skb_put(skb, len); |
|---|
| 1700 | | - skb_set_owner_w(skb, tfile->socket.sk); |
|---|
| 1701 | | - get_page(alloc_frag->page); |
|---|
| 1702 | | - alloc_frag->offset += buflen; |
|---|
| 1703 | | - |
|---|
| 1704 | 1743 | rcu_read_unlock(); |
|---|
| 1705 | 1744 | local_bh_enable(); |
|---|
| 1706 | 1745 | |
|---|
| 1707 | | - return skb; |
|---|
| 1746 | + return __tun_build_skb(tfile, alloc_frag, buf, buflen, len, pad); |
|---|
| 1708 | 1747 | |
|---|
| 1709 | | -err_redirect: |
|---|
| 1710 | | - put_page(alloc_frag->page); |
|---|
| 1711 | | -err_xdp: |
|---|
| 1748 | +out: |
|---|
| 1712 | 1749 | rcu_read_unlock(); |
|---|
| 1713 | 1750 | local_bh_enable(); |
|---|
| 1714 | | - this_cpu_inc(tun->pcpu_stats->rx_dropped); |
|---|
| 1715 | 1751 | return NULL; |
|---|
| 1716 | 1752 | } |
|---|
| 1717 | 1753 | |
|---|
| .. | .. |
|---|
| 1902 | 1938 | } |
|---|
| 1903 | 1939 | |
|---|
| 1904 | 1940 | skb_reset_network_header(skb); |
|---|
| 1905 | | - skb_probe_transport_header(skb, 0); |
|---|
| 1941 | + skb_probe_transport_header(skb); |
|---|
| 1942 | + skb_record_rx_queue(skb, tfile->queue_index); |
|---|
| 1906 | 1943 | |
|---|
| 1907 | 1944 | if (skb_xdp) { |
|---|
| 1908 | 1945 | struct bpf_prog *xdp_prog; |
|---|
| .. | .. |
|---|
| 1947 | 1984 | |
|---|
| 1948 | 1985 | /* Exercise flow dissector code path. */ |
|---|
| 1949 | 1986 | skb_push(skb, ETH_HLEN); |
|---|
| 1950 | | - headlen = eth_get_headlen(skb->data, skb_headlen(skb)); |
|---|
| 1987 | + headlen = eth_get_headlen(tun->dev, skb->data, |
|---|
| 1988 | + skb_headlen(skb)); |
|---|
| 1951 | 1989 | |
|---|
| 1952 | 1990 | if (unlikely(headlen > skb_headlen(skb))) { |
|---|
| 1991 | + WARN_ON_ONCE(1); |
|---|
| 1992 | + err = -ENOMEM; |
|---|
| 1953 | 1993 | this_cpu_inc(tun->pcpu_stats->rx_dropped); |
|---|
| 1994 | +napi_busy: |
|---|
| 1954 | 1995 | napi_free_frags(&tfile->napi); |
|---|
| 1955 | 1996 | rcu_read_unlock(); |
|---|
| 1956 | 1997 | mutex_unlock(&tfile->napi_mutex); |
|---|
| 1957 | | - WARN_ON(1); |
|---|
| 1958 | | - return -ENOMEM; |
|---|
| 1998 | + return err; |
|---|
| 1959 | 1999 | } |
|---|
| 1960 | 2000 | |
|---|
| 1961 | | - local_bh_disable(); |
|---|
| 1962 | | - napi_gro_frags(&tfile->napi); |
|---|
| 1963 | | - local_bh_enable(); |
|---|
| 2001 | + if (likely(napi_schedule_prep(&tfile->napi))) { |
|---|
| 2002 | + local_bh_disable(); |
|---|
| 2003 | + napi_gro_frags(&tfile->napi); |
|---|
| 2004 | + napi_complete(&tfile->napi); |
|---|
| 2005 | + local_bh_enable(); |
|---|
| 2006 | + } else { |
|---|
| 2007 | + err = -EBUSY; |
|---|
| 2008 | + goto napi_busy; |
|---|
| 2009 | + } |
|---|
| 1964 | 2010 | mutex_unlock(&tfile->napi_mutex); |
|---|
| 1965 | 2011 | } else if (tfile->napi_enabled) { |
|---|
| 1966 | 2012 | struct sk_buff_head *queue = &tfile->sk.sk_write_queue; |
|---|
| .. | .. |
|---|
| 1984 | 2030 | |
|---|
| 1985 | 2031 | stats = get_cpu_ptr(tun->pcpu_stats); |
|---|
| 1986 | 2032 | u64_stats_update_begin(&stats->syncp); |
|---|
| 1987 | | - stats->rx_packets++; |
|---|
| 1988 | | - stats->rx_bytes += len; |
|---|
| 2033 | + u64_stats_inc(&stats->rx_packets); |
|---|
| 2034 | + u64_stats_add(&stats->rx_bytes, len); |
|---|
| 1989 | 2035 | u64_stats_update_end(&stats->syncp); |
|---|
| 1990 | 2036 | put_cpu_ptr(stats); |
|---|
| 1991 | 2037 | |
|---|
| .. | .. |
|---|
| 2041 | 2087 | |
|---|
| 2042 | 2088 | stats = get_cpu_ptr(tun->pcpu_stats); |
|---|
| 2043 | 2089 | u64_stats_update_begin(&stats->syncp); |
|---|
| 2044 | | - stats->tx_packets++; |
|---|
| 2045 | | - stats->tx_bytes += ret; |
|---|
| 2090 | + u64_stats_inc(&stats->tx_packets); |
|---|
| 2091 | + u64_stats_add(&stats->tx_bytes, ret); |
|---|
| 2046 | 2092 | u64_stats_update_end(&stats->syncp); |
|---|
| 2047 | 2093 | put_cpu_ptr(tun->pcpu_stats); |
|---|
| 2048 | 2094 | |
|---|
| .. | .. |
|---|
| 2136 | 2182 | /* caller is in process context, */ |
|---|
| 2137 | 2183 | stats = get_cpu_ptr(tun->pcpu_stats); |
|---|
| 2138 | 2184 | u64_stats_update_begin(&stats->syncp); |
|---|
| 2139 | | - stats->tx_packets++; |
|---|
| 2140 | | - stats->tx_bytes += skb->len + vlan_hlen; |
|---|
| 2185 | + u64_stats_inc(&stats->tx_packets); |
|---|
| 2186 | + u64_stats_add(&stats->tx_bytes, skb->len + vlan_hlen); |
|---|
| 2141 | 2187 | u64_stats_update_end(&stats->syncp); |
|---|
| 2142 | 2188 | put_cpu_ptr(tun->pcpu_stats); |
|---|
| 2143 | 2189 | |
|---|
| .. | .. |
|---|
| 2158 | 2204 | goto out; |
|---|
| 2159 | 2205 | } |
|---|
| 2160 | 2206 | |
|---|
| 2161 | | - add_wait_queue(&tfile->wq.wait, &wait); |
|---|
| 2207 | + add_wait_queue(&tfile->socket.wq.wait, &wait); |
|---|
| 2162 | 2208 | |
|---|
| 2163 | 2209 | while (1) { |
|---|
| 2164 | 2210 | set_current_state(TASK_INTERRUPTIBLE); |
|---|
| .. | .. |
|---|
| 2178 | 2224 | } |
|---|
| 2179 | 2225 | |
|---|
| 2180 | 2226 | __set_current_state(TASK_RUNNING); |
|---|
| 2181 | | - remove_wait_queue(&tfile->wq.wait, &wait); |
|---|
| 2227 | + remove_wait_queue(&tfile->socket.wq.wait, &wait); |
|---|
| 2182 | 2228 | |
|---|
| 2183 | 2229 | out: |
|---|
| 2184 | 2230 | *err = error; |
|---|
| .. | .. |
|---|
| 2191 | 2237 | { |
|---|
| 2192 | 2238 | ssize_t ret; |
|---|
| 2193 | 2239 | int err; |
|---|
| 2194 | | - |
|---|
| 2195 | | - tun_debug(KERN_INFO, tun, "tun_do_read\n"); |
|---|
| 2196 | 2240 | |
|---|
| 2197 | 2241 | if (!iov_iter_count(to)) { |
|---|
| 2198 | 2242 | tun_ptr_free(ptr); |
|---|
| .. | .. |
|---|
| 2284 | 2328 | struct tun_struct *tun = netdev_priv(dev); |
|---|
| 2285 | 2329 | |
|---|
| 2286 | 2330 | BUG_ON(!(list_empty(&tun->disabled))); |
|---|
| 2331 | + |
|---|
| 2287 | 2332 | free_percpu(tun->pcpu_stats); |
|---|
| 2333 | + |
|---|
| 2288 | 2334 | tun_flow_uninit(tun); |
|---|
| 2289 | 2335 | security_tun_dev_free_security(tun->security); |
|---|
| 2290 | 2336 | __tun_set_ebpf(tun, &tun->steering_prog, NULL); |
|---|
| .. | .. |
|---|
| 2400 | 2446 | kill_fasync(&tfile->fasync, SIGIO, POLL_OUT); |
|---|
| 2401 | 2447 | } |
|---|
| 2402 | 2448 | |
|---|
| 2449 | +static void tun_put_page(struct tun_page *tpage) |
|---|
| 2450 | +{ |
|---|
| 2451 | + if (tpage->page) |
|---|
| 2452 | + __page_frag_cache_drain(tpage->page, tpage->count); |
|---|
| 2453 | +} |
|---|
| 2454 | + |
|---|
| 2455 | +static int tun_xdp_one(struct tun_struct *tun, |
|---|
| 2456 | + struct tun_file *tfile, |
|---|
| 2457 | + struct xdp_buff *xdp, int *flush, |
|---|
| 2458 | + struct tun_page *tpage) |
|---|
| 2459 | +{ |
|---|
| 2460 | + unsigned int datasize = xdp->data_end - xdp->data; |
|---|
| 2461 | + struct tun_xdp_hdr *hdr = xdp->data_hard_start; |
|---|
| 2462 | + struct virtio_net_hdr *gso = &hdr->gso; |
|---|
| 2463 | + struct tun_pcpu_stats *stats; |
|---|
| 2464 | + struct bpf_prog *xdp_prog; |
|---|
| 2465 | + struct sk_buff *skb = NULL; |
|---|
| 2466 | + u32 rxhash = 0, act; |
|---|
| 2467 | + int buflen = hdr->buflen; |
|---|
| 2468 | + int err = 0; |
|---|
| 2469 | + bool skb_xdp = false; |
|---|
| 2470 | + struct page *page; |
|---|
| 2471 | + |
|---|
| 2472 | + xdp_prog = rcu_dereference(tun->xdp_prog); |
|---|
| 2473 | + if (xdp_prog) { |
|---|
| 2474 | + if (gso->gso_type) { |
|---|
| 2475 | + skb_xdp = true; |
|---|
| 2476 | + goto build; |
|---|
| 2477 | + } |
|---|
| 2478 | + xdp_set_data_meta_invalid(xdp); |
|---|
| 2479 | + xdp->rxq = &tfile->xdp_rxq; |
|---|
| 2480 | + xdp->frame_sz = buflen; |
|---|
| 2481 | + |
|---|
| 2482 | + act = bpf_prog_run_xdp(xdp_prog, xdp); |
|---|
| 2483 | + err = tun_xdp_act(tun, xdp_prog, xdp, act); |
|---|
| 2484 | + if (err < 0) { |
|---|
| 2485 | + put_page(virt_to_head_page(xdp->data)); |
|---|
| 2486 | + return err; |
|---|
| 2487 | + } |
|---|
| 2488 | + |
|---|
| 2489 | + switch (err) { |
|---|
| 2490 | + case XDP_REDIRECT: |
|---|
| 2491 | + *flush = true; |
|---|
| 2492 | + fallthrough; |
|---|
| 2493 | + case XDP_TX: |
|---|
| 2494 | + return 0; |
|---|
| 2495 | + case XDP_PASS: |
|---|
| 2496 | + break; |
|---|
| 2497 | + default: |
|---|
| 2498 | + page = virt_to_head_page(xdp->data); |
|---|
| 2499 | + if (tpage->page == page) { |
|---|
| 2500 | + ++tpage->count; |
|---|
| 2501 | + } else { |
|---|
| 2502 | + tun_put_page(tpage); |
|---|
| 2503 | + tpage->page = page; |
|---|
| 2504 | + tpage->count = 1; |
|---|
| 2505 | + } |
|---|
| 2506 | + return 0; |
|---|
| 2507 | + } |
|---|
| 2508 | + } |
|---|
| 2509 | + |
|---|
| 2510 | +build: |
|---|
| 2511 | + skb = build_skb(xdp->data_hard_start, buflen); |
|---|
| 2512 | + if (!skb) { |
|---|
| 2513 | + err = -ENOMEM; |
|---|
| 2514 | + goto out; |
|---|
| 2515 | + } |
|---|
| 2516 | + |
|---|
| 2517 | + skb_reserve(skb, xdp->data - xdp->data_hard_start); |
|---|
| 2518 | + skb_put(skb, xdp->data_end - xdp->data); |
|---|
| 2519 | + |
|---|
| 2520 | + if (virtio_net_hdr_to_skb(skb, gso, tun_is_little_endian(tun))) { |
|---|
| 2521 | + this_cpu_inc(tun->pcpu_stats->rx_frame_errors); |
|---|
| 2522 | + kfree_skb(skb); |
|---|
| 2523 | + err = -EINVAL; |
|---|
| 2524 | + goto out; |
|---|
| 2525 | + } |
|---|
| 2526 | + |
|---|
| 2527 | + skb->protocol = eth_type_trans(skb, tun->dev); |
|---|
| 2528 | + skb_reset_network_header(skb); |
|---|
| 2529 | + skb_probe_transport_header(skb); |
|---|
| 2530 | + skb_record_rx_queue(skb, tfile->queue_index); |
|---|
| 2531 | + |
|---|
| 2532 | + if (skb_xdp) { |
|---|
| 2533 | + err = do_xdp_generic(xdp_prog, skb); |
|---|
| 2534 | + if (err != XDP_PASS) |
|---|
| 2535 | + goto out; |
|---|
| 2536 | + } |
|---|
| 2537 | + |
|---|
| 2538 | + if (!rcu_dereference(tun->steering_prog) && tun->numqueues > 1 && |
|---|
| 2539 | + !tfile->detached) |
|---|
| 2540 | + rxhash = __skb_get_hash_symmetric(skb); |
|---|
| 2541 | + |
|---|
| 2542 | + netif_receive_skb(skb); |
|---|
| 2543 | + |
|---|
| 2544 | + /* No need for get_cpu_ptr() here since this function is |
|---|
| 2545 | + * always called with bh disabled |
|---|
| 2546 | + */ |
|---|
| 2547 | + stats = this_cpu_ptr(tun->pcpu_stats); |
|---|
| 2548 | + u64_stats_update_begin(&stats->syncp); |
|---|
| 2549 | + u64_stats_inc(&stats->rx_packets); |
|---|
| 2550 | + u64_stats_add(&stats->rx_bytes, datasize); |
|---|
| 2551 | + u64_stats_update_end(&stats->syncp); |
|---|
| 2552 | + |
|---|
| 2553 | + if (rxhash) |
|---|
| 2554 | + tun_flow_update(tun, rxhash, tfile); |
|---|
| 2555 | + |
|---|
| 2556 | +out: |
|---|
| 2557 | + return err; |
|---|
| 2558 | +} |
|---|
| 2559 | + |
|---|
| 2403 | 2560 | static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len) |
|---|
| 2404 | 2561 | { |
|---|
| 2405 | | - int ret; |
|---|
| 2562 | + int ret, i; |
|---|
| 2406 | 2563 | struct tun_file *tfile = container_of(sock, struct tun_file, socket); |
|---|
| 2407 | 2564 | struct tun_struct *tun = tun_get(tfile); |
|---|
| 2565 | + struct tun_msg_ctl *ctl = m->msg_control; |
|---|
| 2566 | + struct xdp_buff *xdp; |
|---|
| 2408 | 2567 | |
|---|
| 2409 | 2568 | if (!tun) |
|---|
| 2410 | 2569 | return -EBADFD; |
|---|
| 2411 | 2570 | |
|---|
| 2412 | | - ret = tun_get_user(tun, tfile, m->msg_control, &m->msg_iter, |
|---|
| 2571 | + if (m->msg_controllen == sizeof(struct tun_msg_ctl) && |
|---|
| 2572 | + ctl && ctl->type == TUN_MSG_PTR) { |
|---|
| 2573 | + struct tun_page tpage; |
|---|
| 2574 | + int n = ctl->num; |
|---|
| 2575 | + int flush = 0; |
|---|
| 2576 | + |
|---|
| 2577 | + memset(&tpage, 0, sizeof(tpage)); |
|---|
| 2578 | + |
|---|
| 2579 | + local_bh_disable(); |
|---|
| 2580 | + rcu_read_lock(); |
|---|
| 2581 | + |
|---|
| 2582 | + for (i = 0; i < n; i++) { |
|---|
| 2583 | + xdp = &((struct xdp_buff *)ctl->ptr)[i]; |
|---|
| 2584 | + tun_xdp_one(tun, tfile, xdp, &flush, &tpage); |
|---|
| 2585 | + } |
|---|
| 2586 | + |
|---|
| 2587 | + if (flush) |
|---|
| 2588 | + xdp_do_flush(); |
|---|
| 2589 | + |
|---|
| 2590 | + rcu_read_unlock(); |
|---|
| 2591 | + local_bh_enable(); |
|---|
| 2592 | + |
|---|
| 2593 | + tun_put_page(&tpage); |
|---|
| 2594 | + |
|---|
| 2595 | + ret = total_len; |
|---|
| 2596 | + goto out; |
|---|
| 2597 | + } |
|---|
| 2598 | + |
|---|
| 2599 | + ret = tun_get_user(tun, tfile, ctl ? ctl->ptr : NULL, &m->msg_iter, |
|---|
| 2413 | 2600 | m->msg_flags & MSG_DONTWAIT, |
|---|
| 2414 | 2601 | m->msg_flags & MSG_MORE); |
|---|
| 2602 | +out: |
|---|
| 2415 | 2603 | tun_put(tun); |
|---|
| 2416 | 2604 | return ret; |
|---|
| 2417 | 2605 | } |
|---|
| .. | .. |
|---|
| 2636 | 2824 | |
|---|
| 2637 | 2825 | if (!dev) |
|---|
| 2638 | 2826 | return -ENOMEM; |
|---|
| 2639 | | - err = dev_get_valid_name(net, dev, name); |
|---|
| 2640 | | - if (err < 0) |
|---|
| 2641 | | - goto err_free_dev; |
|---|
| 2642 | 2827 | |
|---|
| 2643 | 2828 | dev_net_set(dev, net); |
|---|
| 2644 | 2829 | dev->rtnl_link_ops = &tun_link_ops; |
|---|
| .. | .. |
|---|
| 2657 | 2842 | tun->rx_batched = 0; |
|---|
| 2658 | 2843 | RCU_INIT_POINTER(tun->steering_prog, NULL); |
|---|
| 2659 | 2844 | |
|---|
| 2660 | | - tun->pcpu_stats = netdev_alloc_pcpu_stats(struct tun_pcpu_stats); |
|---|
| 2661 | | - if (!tun->pcpu_stats) { |
|---|
| 2662 | | - err = -ENOMEM; |
|---|
| 2663 | | - goto err_free_dev; |
|---|
| 2664 | | - } |
|---|
| 2845 | + tun->ifr = ifr; |
|---|
| 2846 | + tun->file = file; |
|---|
| 2665 | 2847 | |
|---|
| 2666 | | - spin_lock_init(&tun->lock); |
|---|
| 2667 | | - |
|---|
| 2668 | | - err = security_tun_dev_alloc_security(&tun->security); |
|---|
| 2669 | | - if (err < 0) |
|---|
| 2670 | | - goto err_free_stat; |
|---|
| 2671 | | - |
|---|
| 2672 | | - tun_net_init(dev); |
|---|
| 2673 | | - tun_flow_init(tun); |
|---|
| 2674 | | - |
|---|
| 2675 | | - dev->hw_features = NETIF_F_SG | NETIF_F_FRAGLIST | |
|---|
| 2676 | | - TUN_USER_FEATURES | NETIF_F_HW_VLAN_CTAG_TX | |
|---|
| 2677 | | - NETIF_F_HW_VLAN_STAG_TX; |
|---|
| 2678 | | - dev->features = dev->hw_features | NETIF_F_LLTX; |
|---|
| 2679 | | - dev->vlan_features = dev->features & |
|---|
| 2680 | | - ~(NETIF_F_HW_VLAN_CTAG_TX | |
|---|
| 2681 | | - NETIF_F_HW_VLAN_STAG_TX); |
|---|
| 2682 | | - |
|---|
| 2683 | | - tun->flags = (tun->flags & ~TUN_FEATURES) | |
|---|
| 2684 | | - (ifr->ifr_flags & TUN_FEATURES); |
|---|
| 2685 | | - |
|---|
| 2686 | | - INIT_LIST_HEAD(&tun->disabled); |
|---|
| 2687 | | - err = tun_attach(tun, file, false, ifr->ifr_flags & IFF_NAPI, |
|---|
| 2688 | | - ifr->ifr_flags & IFF_NAPI_FRAGS, false); |
|---|
| 2689 | | - if (err < 0) |
|---|
| 2690 | | - goto err_free_flow; |
|---|
| 2848 | + tun_net_initialize(dev); |
|---|
| 2691 | 2849 | |
|---|
| 2692 | 2850 | err = register_netdevice(tun->dev); |
|---|
| 2693 | | - if (err < 0) |
|---|
| 2694 | | - goto err_detach; |
|---|
| 2851 | + if (err < 0) { |
|---|
| 2852 | + free_netdev(dev); |
|---|
| 2853 | + return err; |
|---|
| 2854 | + } |
|---|
| 2695 | 2855 | /* free_netdev() won't check refcnt, to aovid race |
|---|
| 2696 | 2856 | * with dev_put() we need publish tun after registration. |
|---|
| 2697 | 2857 | */ |
|---|
| .. | .. |
|---|
| 2699 | 2859 | } |
|---|
| 2700 | 2860 | |
|---|
| 2701 | 2861 | netif_carrier_on(tun->dev); |
|---|
| 2702 | | - |
|---|
| 2703 | | - tun_debug(KERN_INFO, tun, "tun_set_iff\n"); |
|---|
| 2704 | 2862 | |
|---|
| 2705 | 2863 | /* Make sure persistent devices do not get stuck in |
|---|
| 2706 | 2864 | * xoff state. |
|---|
| .. | .. |
|---|
| 2710 | 2868 | |
|---|
| 2711 | 2869 | strcpy(ifr->ifr_name, tun->dev->name); |
|---|
| 2712 | 2870 | return 0; |
|---|
| 2713 | | - |
|---|
| 2714 | | -err_detach: |
|---|
| 2715 | | - tun_detach_all(dev); |
|---|
| 2716 | | - /* register_netdevice() already called tun_free_netdev() */ |
|---|
| 2717 | | - goto err_free_dev; |
|---|
| 2718 | | - |
|---|
| 2719 | | -err_free_flow: |
|---|
| 2720 | | - tun_flow_uninit(tun); |
|---|
| 2721 | | - security_tun_dev_free_security(tun->security); |
|---|
| 2722 | | -err_free_stat: |
|---|
| 2723 | | - free_percpu(tun->pcpu_stats); |
|---|
| 2724 | | -err_free_dev: |
|---|
| 2725 | | - free_netdev(dev); |
|---|
| 2726 | | - return err; |
|---|
| 2727 | 2871 | } |
|---|
| 2728 | 2872 | |
|---|
| 2729 | | -static void tun_get_iff(struct net *net, struct tun_struct *tun, |
|---|
| 2730 | | - struct ifreq *ifr) |
|---|
| 2873 | +static void tun_get_iff(struct tun_struct *tun, struct ifreq *ifr) |
|---|
| 2731 | 2874 | { |
|---|
| 2732 | | - tun_debug(KERN_INFO, tun, "tun_get_iff\n"); |
|---|
| 2733 | | - |
|---|
| 2734 | 2875 | strcpy(ifr->ifr_name, tun->dev->name); |
|---|
| 2735 | 2876 | |
|---|
| 2736 | 2877 | ifr->ifr_flags = tun_flags(tun); |
|---|
| .. | .. |
|---|
| 2857 | 2998 | return ret; |
|---|
| 2858 | 2999 | } |
|---|
| 2859 | 3000 | |
|---|
| 2860 | | -static int tun_set_ebpf(struct tun_struct *tun, struct tun_prog **prog_p, |
|---|
| 3001 | +static int tun_set_ebpf(struct tun_struct *tun, struct tun_prog __rcu **prog_p, |
|---|
| 2861 | 3002 | void __user *data) |
|---|
| 2862 | 3003 | { |
|---|
| 2863 | 3004 | struct bpf_prog *prog; |
|---|
| .. | .. |
|---|
| 2923 | 3064 | struct net *net = sock_net(&tfile->sk); |
|---|
| 2924 | 3065 | struct tun_struct *tun; |
|---|
| 2925 | 3066 | void __user* argp = (void __user*)arg; |
|---|
| 3067 | + unsigned int ifindex, carrier; |
|---|
| 2926 | 3068 | struct ifreq ifr; |
|---|
| 2927 | 3069 | kuid_t owner; |
|---|
| 2928 | 3070 | kgid_t group; |
|---|
| 2929 | 3071 | int sndbuf; |
|---|
| 2930 | 3072 | int vnet_hdr_sz; |
|---|
| 2931 | | - unsigned int ifindex; |
|---|
| 2932 | 3073 | int le; |
|---|
| 2933 | 3074 | int ret; |
|---|
| 2934 | 3075 | bool do_notify = false; |
|---|
| .. | .. |
|---|
| 2993 | 3134 | if (!tun) |
|---|
| 2994 | 3135 | goto unlock; |
|---|
| 2995 | 3136 | |
|---|
| 2996 | | - tun_debug(KERN_INFO, tun, "tun_chr_ioctl cmd %u\n", cmd); |
|---|
| 3137 | + netif_info(tun, drv, tun->dev, "tun_chr_ioctl cmd %u\n", cmd); |
|---|
| 2997 | 3138 | |
|---|
| 3139 | + net = dev_net(tun->dev); |
|---|
| 2998 | 3140 | ret = 0; |
|---|
| 2999 | 3141 | switch (cmd) { |
|---|
| 3000 | 3142 | case TUNGETIFF: |
|---|
| 3001 | | - tun_get_iff(current->nsproxy->net_ns, tun, &ifr); |
|---|
| 3143 | + tun_get_iff(tun, &ifr); |
|---|
| 3002 | 3144 | |
|---|
| 3003 | 3145 | if (tfile->detached) |
|---|
| 3004 | 3146 | ifr.ifr_flags |= IFF_DETACH_QUEUE; |
|---|
| .. | .. |
|---|
| 3013 | 3155 | /* Disable/Enable checksum */ |
|---|
| 3014 | 3156 | |
|---|
| 3015 | 3157 | /* [unimplemented] */ |
|---|
| 3016 | | - tun_debug(KERN_INFO, tun, "ignored: set checksum %s\n", |
|---|
| 3017 | | - arg ? "disabled" : "enabled"); |
|---|
| 3158 | + netif_info(tun, drv, tun->dev, "ignored: set checksum %s\n", |
|---|
| 3159 | + arg ? "disabled" : "enabled"); |
|---|
| 3018 | 3160 | break; |
|---|
| 3019 | 3161 | |
|---|
| 3020 | 3162 | case TUNSETPERSIST: |
|---|
| .. | .. |
|---|
| 3032 | 3174 | do_notify = true; |
|---|
| 3033 | 3175 | } |
|---|
| 3034 | 3176 | |
|---|
| 3035 | | - tun_debug(KERN_INFO, tun, "persist %s\n", |
|---|
| 3036 | | - arg ? "enabled" : "disabled"); |
|---|
| 3177 | + netif_info(tun, drv, tun->dev, "persist %s\n", |
|---|
| 3178 | + arg ? "enabled" : "disabled"); |
|---|
| 3037 | 3179 | break; |
|---|
| 3038 | 3180 | |
|---|
| 3039 | 3181 | case TUNSETOWNER: |
|---|
| .. | .. |
|---|
| 3045 | 3187 | } |
|---|
| 3046 | 3188 | tun->owner = owner; |
|---|
| 3047 | 3189 | do_notify = true; |
|---|
| 3048 | | - tun_debug(KERN_INFO, tun, "owner set to %u\n", |
|---|
| 3049 | | - from_kuid(&init_user_ns, tun->owner)); |
|---|
| 3190 | + netif_info(tun, drv, tun->dev, "owner set to %u\n", |
|---|
| 3191 | + from_kuid(&init_user_ns, tun->owner)); |
|---|
| 3050 | 3192 | break; |
|---|
| 3051 | 3193 | |
|---|
| 3052 | 3194 | case TUNSETGROUP: |
|---|
| .. | .. |
|---|
| 3058 | 3200 | } |
|---|
| 3059 | 3201 | tun->group = group; |
|---|
| 3060 | 3202 | do_notify = true; |
|---|
| 3061 | | - tun_debug(KERN_INFO, tun, "group set to %u\n", |
|---|
| 3062 | | - from_kgid(&init_user_ns, tun->group)); |
|---|
| 3203 | + netif_info(tun, drv, tun->dev, "group set to %u\n", |
|---|
| 3204 | + from_kgid(&init_user_ns, tun->group)); |
|---|
| 3063 | 3205 | break; |
|---|
| 3064 | 3206 | |
|---|
| 3065 | 3207 | case TUNSETLINK: |
|---|
| 3066 | 3208 | /* Only allow setting the type when the interface is down */ |
|---|
| 3067 | 3209 | if (tun->dev->flags & IFF_UP) { |
|---|
| 3068 | | - tun_debug(KERN_INFO, tun, |
|---|
| 3069 | | - "Linktype set failed because interface is up\n"); |
|---|
| 3210 | + netif_info(tun, drv, tun->dev, |
|---|
| 3211 | + "Linktype set failed because interface is up\n"); |
|---|
| 3070 | 3212 | ret = -EBUSY; |
|---|
| 3071 | 3213 | } else { |
|---|
| 3072 | 3214 | tun->dev->type = (int) arg; |
|---|
| 3073 | 3215 | tun->dev->addr_len = tun_get_addr_len(tun->dev->type); |
|---|
| 3074 | | - tun_debug(KERN_INFO, tun, "linktype set to %d\n", |
|---|
| 3075 | | - tun->dev->type); |
|---|
| 3216 | + netif_info(tun, drv, tun->dev, "linktype set to %d\n", |
|---|
| 3217 | + tun->dev->type); |
|---|
| 3076 | 3218 | ret = 0; |
|---|
| 3077 | 3219 | } |
|---|
| 3078 | 3220 | break; |
|---|
| 3079 | 3221 | |
|---|
| 3080 | | -#ifdef TUN_DEBUG |
|---|
| 3081 | 3222 | case TUNSETDEBUG: |
|---|
| 3082 | | - tun->debug = arg; |
|---|
| 3223 | + tun->msg_enable = (u32)arg; |
|---|
| 3083 | 3224 | break; |
|---|
| 3084 | | -#endif |
|---|
| 3225 | + |
|---|
| 3085 | 3226 | case TUNSETOFFLOAD: |
|---|
| 3086 | 3227 | ret = set_offload(tun, arg); |
|---|
| 3087 | 3228 | break; |
|---|
| .. | .. |
|---|
| 3096 | 3237 | |
|---|
| 3097 | 3238 | case SIOCGIFHWADDR: |
|---|
| 3098 | 3239 | /* Get hw address */ |
|---|
| 3099 | | - memcpy(ifr.ifr_hwaddr.sa_data, tun->dev->dev_addr, ETH_ALEN); |
|---|
| 3100 | | - ifr.ifr_hwaddr.sa_family = tun->dev->type; |
|---|
| 3240 | + dev_get_mac_address(&ifr.ifr_hwaddr, net, tun->dev->name); |
|---|
| 3101 | 3241 | if (copy_to_user(argp, &ifr, ifreq_len)) |
|---|
| 3102 | 3242 | ret = -EFAULT; |
|---|
| 3103 | 3243 | break; |
|---|
| 3104 | 3244 | |
|---|
| 3105 | 3245 | case SIOCSIFHWADDR: |
|---|
| 3106 | 3246 | /* Set hw address */ |
|---|
| 3107 | | - tun_debug(KERN_DEBUG, tun, "set hw address: %pM\n", |
|---|
| 3108 | | - ifr.ifr_hwaddr.sa_data); |
|---|
| 3109 | | - |
|---|
| 3110 | | - ret = dev_set_mac_address(tun->dev, &ifr.ifr_hwaddr); |
|---|
| 3247 | + ret = dev_set_mac_address_user(tun->dev, &ifr.ifr_hwaddr, NULL); |
|---|
| 3111 | 3248 | break; |
|---|
| 3112 | 3249 | |
|---|
| 3113 | 3250 | case TUNGETSNDBUF: |
|---|
| .. | .. |
|---|
| 3213 | 3350 | ret = tun_set_ebpf(tun, &tun->filter_prog, argp); |
|---|
| 3214 | 3351 | break; |
|---|
| 3215 | 3352 | |
|---|
| 3353 | + case TUNSETCARRIER: |
|---|
| 3354 | + ret = -EFAULT; |
|---|
| 3355 | + if (copy_from_user(&carrier, argp, sizeof(carrier))) |
|---|
| 3356 | + goto unlock; |
|---|
| 3357 | + |
|---|
| 3358 | + ret = tun_net_change_carrier(tun->dev, (bool)carrier); |
|---|
| 3359 | + break; |
|---|
| 3360 | + |
|---|
| 3361 | + case TUNGETDEVNETNS: |
|---|
| 3362 | + ret = -EPERM; |
|---|
| 3363 | + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) |
|---|
| 3364 | + goto unlock; |
|---|
| 3365 | + ret = open_related_ns(&net->ns, get_net_ns); |
|---|
| 3366 | + break; |
|---|
| 3367 | + |
|---|
| 3216 | 3368 | default: |
|---|
| 3217 | 3369 | ret = -EINVAL; |
|---|
| 3218 | 3370 | break; |
|---|
| .. | .. |
|---|
| 3286 | 3438 | struct net *net = current->nsproxy->net_ns; |
|---|
| 3287 | 3439 | struct tun_file *tfile; |
|---|
| 3288 | 3440 | |
|---|
| 3289 | | - DBG1(KERN_INFO, "tunX: tun_chr_open\n"); |
|---|
| 3290 | | - |
|---|
| 3291 | 3441 | tfile = (struct tun_file *)sk_alloc(net, AF_UNSPEC, GFP_KERNEL, |
|---|
| 3292 | 3442 | &tun_proto, 0); |
|---|
| 3293 | 3443 | if (!tfile) |
|---|
| .. | .. |
|---|
| 3302 | 3452 | tfile->flags = 0; |
|---|
| 3303 | 3453 | tfile->ifindex = 0; |
|---|
| 3304 | 3454 | |
|---|
| 3305 | | - init_waitqueue_head(&tfile->wq.wait); |
|---|
| 3306 | | - RCU_INIT_POINTER(tfile->socket.wq, &tfile->wq); |
|---|
| 3455 | + init_waitqueue_head(&tfile->socket.wq.wait); |
|---|
| 3307 | 3456 | |
|---|
| 3308 | 3457 | tfile->socket.file = file; |
|---|
| 3309 | 3458 | tfile->socket.ops = &tun_socket_ops; |
|---|
| 3310 | 3459 | |
|---|
| 3311 | | - sock_init_data(&tfile->socket, &tfile->sk); |
|---|
| 3460 | + sock_init_data_uid(&tfile->socket, &tfile->sk, current_fsuid()); |
|---|
| 3312 | 3461 | |
|---|
| 3313 | 3462 | tfile->sk.sk_write_space = tun_sock_write_space; |
|---|
| 3314 | 3463 | tfile->sk.sk_sndbuf = INT_MAX; |
|---|
| .. | .. |
|---|
| 3342 | 3491 | rtnl_lock(); |
|---|
| 3343 | 3492 | tun = tun_get(tfile); |
|---|
| 3344 | 3493 | if (tun) |
|---|
| 3345 | | - tun_get_iff(current->nsproxy->net_ns, tun, &ifr); |
|---|
| 3494 | + tun_get_iff(tun, &ifr); |
|---|
| 3346 | 3495 | rtnl_unlock(); |
|---|
| 3347 | 3496 | |
|---|
| 3348 | 3497 | if (tun) |
|---|
| .. | .. |
|---|
| 3428 | 3577 | |
|---|
| 3429 | 3578 | static u32 tun_get_msglevel(struct net_device *dev) |
|---|
| 3430 | 3579 | { |
|---|
| 3431 | | -#ifdef TUN_DEBUG |
|---|
| 3432 | 3580 | struct tun_struct *tun = netdev_priv(dev); |
|---|
| 3433 | | - return tun->debug; |
|---|
| 3434 | | -#else |
|---|
| 3435 | | - return -EOPNOTSUPP; |
|---|
| 3436 | | -#endif |
|---|
| 3581 | + |
|---|
| 3582 | + return tun->msg_enable; |
|---|
| 3437 | 3583 | } |
|---|
| 3438 | 3584 | |
|---|
| 3439 | 3585 | static void tun_set_msglevel(struct net_device *dev, u32 value) |
|---|
| 3440 | 3586 | { |
|---|
| 3441 | | -#ifdef TUN_DEBUG |
|---|
| 3442 | 3587 | struct tun_struct *tun = netdev_priv(dev); |
|---|
| 3443 | | - tun->debug = value; |
|---|
| 3444 | | -#endif |
|---|
| 3588 | + |
|---|
| 3589 | + tun->msg_enable = value; |
|---|
| 3445 | 3590 | } |
|---|
| 3446 | 3591 | |
|---|
| 3447 | 3592 | static int tun_get_coalesce(struct net_device *dev, |
|---|
| .. | .. |
|---|
| 3468 | 3613 | } |
|---|
| 3469 | 3614 | |
|---|
| 3470 | 3615 | static const struct ethtool_ops tun_ethtool_ops = { |
|---|
| 3616 | + .supported_coalesce_params = ETHTOOL_COALESCE_RX_MAX_FRAMES, |
|---|
| 3471 | 3617 | .get_drvinfo = tun_get_drvinfo, |
|---|
| 3472 | 3618 | .get_msglevel = tun_get_msglevel, |
|---|
| 3473 | 3619 | .set_msglevel = tun_set_msglevel, |
|---|