hc
2024-12-19 9370bb92b2d16684ee45cf24e879c93c509162da
kernel/drivers/net/tun.c
....@@ -1,16 +1,7 @@
1
+// SPDX-License-Identifier: GPL-2.0-or-later
12 /*
23 * TUN - Universal TUN/TAP device driver.
34 * Copyright (C) 1999-2002 Maxim Krasnyansky <maxk@qualcomm.com>
4
- *
5
- * This program is free software; you can redistribute it and/or modify
6
- * it under the terms of the GNU General Public License as published by
7
- * the Free Software Foundation; either version 2 of the License, or
8
- * (at your option) any later version.
9
- *
10
- * This program is distributed in the hope that it will be useful,
11
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
- * GNU General Public License for more details.
145 *
156 * $Id: tun.c,v 1.15 2002/03/01 02:44:24 maxk Exp $
167 */
....@@ -71,6 +62,7 @@
7162 #include <net/rtnetlink.h>
7263 #include <net/sock.h>
7364 #include <net/xdp.h>
65
+#include <net/ip_tunnels.h>
7466 #include <linux/seq_file.h>
7567 #include <linux/uio.h>
7668 #include <linux/skb_array.h>
....@@ -92,36 +84,6 @@
9284 static void tun_default_link_ksettings(struct net_device *dev,
9385 struct ethtool_link_ksettings *cmd);
9486
95
-/* Uncomment to enable debugging */
96
-/* #define TUN_DEBUG 1 */
97
-
98
-#ifdef TUN_DEBUG
99
-static int debug;
100
-
101
-#define tun_debug(level, tun, fmt, args...) \
102
-do { \
103
- if (tun->debug) \
104
- netdev_printk(level, tun->dev, fmt, ##args); \
105
-} while (0)
106
-#define DBG1(level, fmt, args...) \
107
-do { \
108
- if (debug == 2) \
109
- printk(level fmt, ##args); \
110
-} while (0)
111
-#else
112
-#define tun_debug(level, tun, fmt, args...) \
113
-do { \
114
- if (0) \
115
- netdev_printk(level, tun->dev, fmt, ##args); \
116
-} while (0)
117
-#define DBG1(level, fmt, args...) \
118
-do { \
119
- if (0) \
120
- printk(level fmt, ##args); \
121
-} while (0)
122
-#endif
123
-
124
-#define TUN_HEADROOM 256
12587 #define TUN_RX_PAD (NET_IP_ALIGN + NET_SKB_PAD)
12688
12789 /* TUN device flags */
....@@ -154,10 +116,10 @@
154116 #define TUN_FLOW_EXPIRE (3 * HZ)
155117
156118 struct tun_pcpu_stats {
157
- u64 rx_packets;
158
- u64 rx_bytes;
159
- u64 tx_packets;
160
- u64 tx_bytes;
119
+ u64_stats_t rx_packets;
120
+ u64_stats_t rx_bytes;
121
+ u64_stats_t tx_packets;
122
+ u64_stats_t tx_bytes;
161123 struct u64_stats_sync syncp;
162124 u32 rx_dropped;
163125 u32 tx_dropped;
....@@ -178,7 +140,6 @@
178140 struct tun_file {
179141 struct sock sk;
180142 struct socket socket;
181
- struct socket_wq wq;
182143 struct tun_struct __rcu *tun;
183144 struct fasync_struct *fasync;
184145 /* only used for fasnyc */
....@@ -197,6 +158,11 @@
197158 struct xdp_rxq_info xdp_rxq;
198159 };
199160
161
+struct tun_page {
162
+ struct page *page;
163
+ int count;
164
+};
165
+
200166 struct tun_flow_entry {
201167 struct hlist_node hash_link;
202168 struct rcu_head rcu;
....@@ -205,7 +171,7 @@
205171 u32 rxhash;
206172 u32 rps_rxhash;
207173 int queue_index;
208
- unsigned long updated;
174
+ unsigned long updated ____cacheline_aligned_in_smp;
209175 };
210176
211177 #define TUN_NUM_FLOW_ENTRIES 1024
....@@ -239,9 +205,7 @@
239205 struct sock_fprog fprog;
240206 /* protected by rtnl lock */
241207 bool filter_attached;
242
-#ifdef TUN_DEBUG
243
- int debug;
244
-#endif
208
+ u32 msg_enable;
245209 spinlock_t lock;
246210 struct hlist_head flows[TUN_NUM_FLOW_ENTRIES];
247211 struct timer_list flow_gc_timer;
....@@ -256,6 +220,9 @@
256220 struct tun_prog __rcu *steering_prog;
257221 struct tun_prog __rcu *filter_prog;
258222 struct ethtool_link_ksettings link_ksettings;
223
+ /* init args */
224
+ struct file *file;
225
+ struct ifreq *ifr;
259226 };
260227
261228 struct veth {
....@@ -263,23 +230,8 @@
263230 __be16 h_vlan_TCI;
264231 };
265232
266
-bool tun_is_xdp_frame(void *ptr)
267
-{
268
- return (unsigned long)ptr & TUN_XDP_FLAG;
269
-}
270
-EXPORT_SYMBOL(tun_is_xdp_frame);
271
-
272
-void *tun_xdp_to_ptr(void *ptr)
273
-{
274
- return (void *)((unsigned long)ptr | TUN_XDP_FLAG);
275
-}
276
-EXPORT_SYMBOL(tun_xdp_to_ptr);
277
-
278
-void *tun_ptr_to_xdp(void *ptr)
279
-{
280
- return (void *)((unsigned long)ptr & ~TUN_XDP_FLAG);
281
-}
282
-EXPORT_SYMBOL(tun_ptr_to_xdp);
233
+static void tun_flow_init(struct tun_struct *tun);
234
+static void tun_flow_uninit(struct tun_struct *tun);
283235
284236 static int tun_napi_receive(struct napi_struct *napi, int budget)
285237 {
....@@ -331,6 +283,12 @@
331283 NAPI_POLL_WEIGHT);
332284 napi_enable(&tfile->napi);
333285 }
286
+}
287
+
288
+static void tun_napi_enable(struct tun_file *tfile)
289
+{
290
+ if (tfile->napi_enabled)
291
+ napi_enable(&tfile->napi);
334292 }
335293
336294 static void tun_napi_disable(struct tun_file *tfile)
....@@ -437,8 +395,9 @@
437395 struct tun_flow_entry *e = kmalloc(sizeof(*e), GFP_ATOMIC);
438396
439397 if (e) {
440
- tun_debug(KERN_INFO, tun, "create flow: hash %u index %u\n",
441
- rxhash, queue_index);
398
+ netif_info(tun, tx_queued, tun->dev,
399
+ "create flow: hash %u index %u\n",
400
+ rxhash, queue_index);
442401 e->updated = jiffies;
443402 e->rxhash = rxhash;
444403 e->rps_rxhash = 0;
....@@ -452,8 +411,8 @@
452411
453412 static void tun_flow_delete(struct tun_struct *tun, struct tun_flow_entry *e)
454413 {
455
- tun_debug(KERN_INFO, tun, "delete flow: hash %u index %u\n",
456
- e->rxhash, e->queue_index);
414
+ netif_info(tun, tx_queued, tun->dev, "delete flow: hash %u index %u\n",
415
+ e->rxhash, e->queue_index);
457416 hlist_del_rcu(&e->hash_link);
458417 kfree_rcu(e, rcu);
459418 --tun->flow_count;
....@@ -499,8 +458,6 @@
499458 unsigned long count = 0;
500459 int i;
501460
502
- tun_debug(KERN_INFO, tun, "tun_flow_cleanup\n");
503
-
504461 spin_lock(&tun->lock);
505462 for (i = 0; i < TUN_NUM_FLOW_ENTRIES; i++) {
506463 struct tun_flow_entry *e;
....@@ -533,18 +490,17 @@
533490 unsigned long delay = tun->ageing_time;
534491 u16 queue_index = tfile->queue_index;
535492
536
- if (!rxhash)
537
- return;
538
- else
539
- head = &tun->flows[tun_hashfn(rxhash)];
493
+ head = &tun->flows[tun_hashfn(rxhash)];
540494
541495 rcu_read_lock();
542496
543497 e = tun_flow_find(head, rxhash);
544498 if (likely(e)) {
545499 /* TODO: keep queueing to old queue until it's empty? */
546
- e->queue_index = queue_index;
547
- e->updated = jiffies;
500
+ if (READ_ONCE(e->queue_index) != queue_index)
501
+ WRITE_ONCE(e->queue_index, queue_index);
502
+ if (e->updated != jiffies)
503
+ e->updated = jiffies;
548504 sock_rps_record_flow_hash(e->rps_rxhash);
549505 } else {
550506 spin_lock_bh(&tun->lock);
....@@ -561,8 +517,7 @@
561517 rcu_read_unlock();
562518 }
563519
564
-/**
565
- * Save the hash received in the stack receive path and update the
520
+/* Save the hash received in the stack receive path and update the
566521 * flow_hash table accordingly.
567522 */
568523 static inline void tun_flow_save_rps_rxhash(struct tun_flow_entry *e, u32 hash)
....@@ -571,12 +526,11 @@
571526 e->rps_rxhash = hash;
572527 }
573528
574
-/* We try to identify a flow through its rxhash first. The reason that
529
+/* We try to identify a flow through its rxhash. The reason that
575530 * we do not check rxq no. is because some cards(e.g 82599), chooses
576531 * the rxq based on the txq where the last packet of the flow comes. As
577532 * the userspace application move between processors, we may get a
578
- * different rxq no. here. If we could not get rxhash, then we would
579
- * hope the rxq no. may help here.
533
+ * different rxq no. here.
580534 */
581535 static u16 tun_automq_select_queue(struct tun_struct *tun, struct sk_buff *skb)
582536 {
....@@ -587,18 +541,13 @@
587541 numqueues = READ_ONCE(tun->numqueues);
588542
589543 txq = __skb_get_hash_symmetric(skb);
590
- if (txq) {
591
- e = tun_flow_find(&tun->flows[tun_hashfn(txq)], txq);
592
- if (e) {
593
- tun_flow_save_rps_rxhash(e, txq);
594
- txq = e->queue_index;
595
- } else
596
- /* use multiply and shift instead of expensive divide */
597
- txq = ((u64)txq * numqueues) >> 32;
598
- } else if (likely(skb_rx_queue_recorded(skb))) {
599
- txq = skb_get_rx_queue(skb);
600
- while (unlikely(txq >= numqueues))
601
- txq -= numqueues;
544
+ e = tun_flow_find(&tun->flows[tun_hashfn(txq)], txq);
545
+ if (e) {
546
+ tun_flow_save_rps_rxhash(e, txq);
547
+ txq = e->queue_index;
548
+ } else {
549
+ /* use multiply and shift instead of expensive divide */
550
+ txq = ((u64)txq * numqueues) >> 32;
602551 }
603552
604553 return txq;
....@@ -622,8 +571,7 @@
622571 }
623572
624573 static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb,
625
- struct net_device *sb_dev,
626
- select_queue_fallback_t fallback)
574
+ struct net_device *sb_dev)
627575 {
628576 struct tun_struct *tun = netdev_priv(dev);
629577 u16 ret;
....@@ -704,7 +652,8 @@
704652 tun = rtnl_dereference(tfile->tun);
705653
706654 if (tun && clean) {
707
- tun_napi_disable(tfile);
655
+ if (!tfile->detached)
656
+ tun_napi_disable(tfile);
708657 tun_napi_del(tfile);
709658 }
710659
....@@ -723,8 +672,10 @@
723672 if (clean) {
724673 RCU_INIT_POINTER(tfile->tun, NULL);
725674 sock_put(&tfile->sk);
726
- } else
675
+ } else {
727676 tun_disable_queue(tun, tfile);
677
+ tun_napi_disable(tfile);
678
+ }
728679
729680 synchronize_net();
730681 tun_flow_delete_by_queue(tun, tun->numqueues + 1);
....@@ -747,7 +698,6 @@
747698 if (tun)
748699 xdp_rxq_info_unreg(&tfile->xdp_rxq);
749700 ptr_ring_cleanup(&tfile->tx_ring, tun_ptr_free);
750
- sock_put(&tfile->sk);
751701 }
752702 }
753703
....@@ -763,6 +713,9 @@
763713 if (dev)
764714 netdev_state_change(dev);
765715 rtnl_unlock();
716
+
717
+ if (clean)
718
+ sock_put(&tfile->sk);
766719 }
767720
768721 static void tun_detach_all(struct net_device *dev)
....@@ -797,6 +750,7 @@
797750 sock_put(&tfile->sk);
798751 }
799752 list_for_each_entry_safe(tfile, tmp, &tun->disabled, next) {
753
+ tun_napi_del(tfile);
800754 tun_enable_queue(tfile);
801755 tun_queue_purge(tfile);
802756 xdp_rxq_info_unreg(&tfile->xdp_rxq);
....@@ -877,10 +831,14 @@
877831
878832 if (tfile->detached) {
879833 tun_enable_queue(tfile);
834
+ tun_napi_enable(tfile);
880835 } else {
881836 sock_hold(&tfile->sk);
882837 tun_napi_init(tun, tfile, napi, napi_frags);
883838 }
839
+
840
+ if (rtnl_dereference(tun->xdp_prog))
841
+ sock_set_flag(&tfile->sk, SOCK_XDP);
884842
885843 /* device is allowed to go away first, so no need to hold extra
886844 * refcnt.
....@@ -1025,6 +983,49 @@
1025983
1026984 static const struct ethtool_ops tun_ethtool_ops;
1027985
986
+static int tun_net_init(struct net_device *dev)
987
+{
988
+ struct tun_struct *tun = netdev_priv(dev);
989
+ struct ifreq *ifr = tun->ifr;
990
+ int err;
991
+
992
+ tun->pcpu_stats = netdev_alloc_pcpu_stats(struct tun_pcpu_stats);
993
+ if (!tun->pcpu_stats)
994
+ return -ENOMEM;
995
+
996
+ spin_lock_init(&tun->lock);
997
+
998
+ err = security_tun_dev_alloc_security(&tun->security);
999
+ if (err < 0) {
1000
+ free_percpu(tun->pcpu_stats);
1001
+ return err;
1002
+ }
1003
+
1004
+ tun_flow_init(tun);
1005
+
1006
+ dev->hw_features = NETIF_F_SG | NETIF_F_FRAGLIST |
1007
+ TUN_USER_FEATURES | NETIF_F_HW_VLAN_CTAG_TX |
1008
+ NETIF_F_HW_VLAN_STAG_TX;
1009
+ dev->features = dev->hw_features | NETIF_F_LLTX;
1010
+ dev->vlan_features = dev->features &
1011
+ ~(NETIF_F_HW_VLAN_CTAG_TX |
1012
+ NETIF_F_HW_VLAN_STAG_TX);
1013
+
1014
+ tun->flags = (tun->flags & ~TUN_FEATURES) |
1015
+ (ifr->ifr_flags & TUN_FEATURES);
1016
+
1017
+ INIT_LIST_HEAD(&tun->disabled);
1018
+ err = tun_attach(tun, tun->file, false, ifr->ifr_flags & IFF_NAPI,
1019
+ ifr->ifr_flags & IFF_NAPI_FRAGS, false);
1020
+ if (err < 0) {
1021
+ tun_flow_uninit(tun);
1022
+ security_tun_dev_free_security(tun->security);
1023
+ free_percpu(tun->pcpu_stats);
1024
+ return err;
1025
+ }
1026
+ return 0;
1027
+}
1028
+
10281029 /* Net device detach from fd. */
10291030 static void tun_net_uninit(struct net_device *dev)
10301031 {
....@@ -1050,20 +1051,17 @@
10501051 static void tun_automq_xmit(struct tun_struct *tun, struct sk_buff *skb)
10511052 {
10521053 #ifdef CONFIG_RPS
1053
- if (tun->numqueues == 1 && static_key_false(&rps_needed)) {
1054
+ if (tun->numqueues == 1 && static_branch_unlikely(&rps_needed)) {
10541055 /* Select queue was not called for the skbuff, so we extract the
10551056 * RPS hash and save it into the flow_table here.
10561057 */
1058
+ struct tun_flow_entry *e;
10571059 __u32 rxhash;
10581060
10591061 rxhash = __skb_get_hash_symmetric(skb);
1060
- if (rxhash) {
1061
- struct tun_flow_entry *e;
1062
- e = tun_flow_find(&tun->flows[tun_hashfn(rxhash)],
1063
- rxhash);
1064
- if (e)
1065
- tun_flow_save_rps_rxhash(e, rxhash);
1066
- }
1062
+ e = tun_flow_find(&tun->flows[tun_hashfn(rxhash)], rxhash);
1063
+ if (e)
1064
+ tun_flow_save_rps_rxhash(e, rxhash);
10671065 }
10681066 #endif
10691067 }
....@@ -1099,9 +1097,7 @@
10991097 if (!rcu_dereference(tun->steering_prog))
11001098 tun_automq_xmit(tun, skb);
11011099
1102
- tun_debug(KERN_INFO, tun, "tun_net_xmit %d\n", skb->len);
1103
-
1104
- BUG_ON(!tfile);
1100
+ netif_info(tun, tx_queued, tun->dev, "%s %d\n", __func__, skb->len);
11051101
11061102 /* Drop if the filter does not like it.
11071103 * This is a noop if the filter is disabled.
....@@ -1127,7 +1123,7 @@
11271123 */
11281124 skb_orphan(skb);
11291125
1130
- nf_reset(skb);
1126
+ nf_reset_ct(skb);
11311127
11321128 if (ptr_ring_produce(&tfile->tx_ring, skb))
11331129 goto drop;
....@@ -1194,10 +1190,10 @@
11941190 p = per_cpu_ptr(tun->pcpu_stats, i);
11951191 do {
11961192 start = u64_stats_fetch_begin(&p->syncp);
1197
- rxpackets = p->rx_packets;
1198
- rxbytes = p->rx_bytes;
1199
- txpackets = p->tx_packets;
1200
- txbytes = p->tx_bytes;
1193
+ rxpackets = u64_stats_read(&p->rx_packets);
1194
+ rxbytes = u64_stats_read(&p->rx_bytes);
1195
+ txpackets = u64_stats_read(&p->tx_packets);
1196
+ txbytes = u64_stats_read(&p->tx_bytes);
12011197 } while (u64_stats_fetch_retry(&p->syncp, start));
12021198
12031199 stats->rx_packets += rxpackets;
....@@ -1219,24 +1215,28 @@
12191215 struct netlink_ext_ack *extack)
12201216 {
12211217 struct tun_struct *tun = netdev_priv(dev);
1218
+ struct tun_file *tfile;
12221219 struct bpf_prog *old_prog;
1220
+ int i;
12231221
12241222 old_prog = rtnl_dereference(tun->xdp_prog);
12251223 rcu_assign_pointer(tun->xdp_prog, prog);
12261224 if (old_prog)
12271225 bpf_prog_put(old_prog);
12281226
1229
- return 0;
1230
-}
1231
-
1232
-static u32 tun_xdp_query(struct net_device *dev)
1233
-{
1234
- struct tun_struct *tun = netdev_priv(dev);
1235
- const struct bpf_prog *xdp_prog;
1236
-
1237
- xdp_prog = rtnl_dereference(tun->xdp_prog);
1238
- if (xdp_prog)
1239
- return xdp_prog->aux->id;
1227
+ for (i = 0; i < tun->numqueues; i++) {
1228
+ tfile = rtnl_dereference(tun->tfiles[i]);
1229
+ if (prog)
1230
+ sock_set_flag(&tfile->sk, SOCK_XDP);
1231
+ else
1232
+ sock_reset_flag(&tfile->sk, SOCK_XDP);
1233
+ }
1234
+ list_for_each_entry(tfile, &tun->disabled, next) {
1235
+ if (prog)
1236
+ sock_set_flag(&tfile->sk, SOCK_XDP);
1237
+ else
1238
+ sock_reset_flag(&tfile->sk, SOCK_XDP);
1239
+ }
12401240
12411241 return 0;
12421242 }
....@@ -1246,15 +1246,28 @@
12461246 switch (xdp->command) {
12471247 case XDP_SETUP_PROG:
12481248 return tun_xdp_set(dev, xdp->prog, xdp->extack);
1249
- case XDP_QUERY_PROG:
1250
- xdp->prog_id = tun_xdp_query(dev);
1251
- return 0;
12521249 default:
12531250 return -EINVAL;
12541251 }
12551252 }
12561253
1254
+static int tun_net_change_carrier(struct net_device *dev, bool new_carrier)
1255
+{
1256
+ if (new_carrier) {
1257
+ struct tun_struct *tun = netdev_priv(dev);
1258
+
1259
+ if (!tun->numqueues)
1260
+ return -EPERM;
1261
+
1262
+ netif_carrier_on(dev);
1263
+ } else {
1264
+ netif_carrier_off(dev);
1265
+ }
1266
+ return 0;
1267
+}
1268
+
12571269 static const struct net_device_ops tun_netdev_ops = {
1270
+ .ndo_init = tun_net_init,
12581271 .ndo_uninit = tun_net_uninit,
12591272 .ndo_open = tun_net_open,
12601273 .ndo_stop = tun_net_close,
....@@ -1263,6 +1276,7 @@
12631276 .ndo_select_queue = tun_select_queue,
12641277 .ndo_set_rx_headroom = tun_set_headroom,
12651278 .ndo_get_stats64 = tun_net_get_stats64,
1279
+ .ndo_change_carrier = tun_net_change_carrier,
12661280 };
12671281
12681282 static void __tun_xdp_flush_tfile(struct tun_file *tfile)
....@@ -1325,7 +1339,7 @@
13251339
13261340 static int tun_xdp_tx(struct net_device *dev, struct xdp_buff *xdp)
13271341 {
1328
- struct xdp_frame *frame = convert_to_xdp_frame(xdp);
1342
+ struct xdp_frame *frame = xdp_convert_buff_to_frame(xdp);
13291343
13301344 if (unlikely(!frame))
13311345 return -EOVERFLOW;
....@@ -1334,6 +1348,7 @@
13341348 }
13351349
13361350 static const struct net_device_ops tap_netdev_ops = {
1351
+ .ndo_init = tun_net_init,
13371352 .ndo_uninit = tun_net_uninit,
13381353 .ndo_open = tun_net_open,
13391354 .ndo_stop = tun_net_close,
....@@ -1348,6 +1363,7 @@
13481363 .ndo_get_stats64 = tun_net_get_stats64,
13491364 .ndo_bpf = tun_xdp,
13501365 .ndo_xdp_xmit = tun_xdp_xmit,
1366
+ .ndo_change_carrier = tun_net_change_carrier,
13511367 };
13521368
13531369 static void tun_flow_init(struct tun_struct *tun)
....@@ -1373,13 +1389,14 @@
13731389 #define MAX_MTU 65535
13741390
13751391 /* Initialize net device. */
1376
-static void tun_net_init(struct net_device *dev)
1392
+static void tun_net_initialize(struct net_device *dev)
13771393 {
13781394 struct tun_struct *tun = netdev_priv(dev);
13791395
13801396 switch (tun->flags & TUN_TYPE_MASK) {
13811397 case IFF_TUN:
13821398 dev->netdev_ops = &tun_netdev_ops;
1399
+ dev->header_ops = &ip_tunnel_header_ops;
13831400
13841401 /* Point-to-Point TUN Device */
13851402 dev->hard_header_len = 0;
....@@ -1429,8 +1446,6 @@
14291446
14301447 sk = tfile->socket.sk;
14311448
1432
- tun_debug(KERN_INFO, tun, "tun_chr_poll\n");
1433
-
14341449 poll_wait(file, sk_sleep(sk), wait);
14351450
14361451 if (!ptr_ring_empty(&tfile->tx_ring))
....@@ -1462,7 +1477,8 @@
14621477 int err;
14631478 int i;
14641479
1465
- if (it->nr_segs > MAX_SKB_FRAGS + 1)
1480
+ if (it->nr_segs > MAX_SKB_FRAGS + 1 ||
1481
+ len > (ETH_MAX_MTU - NET_SKB_PAD - NET_IP_ALIGN))
14661482 return ERR_PTR(-EMSGSIZE);
14671483
14681484 local_bh_disable();
....@@ -1481,23 +1497,22 @@
14811497 skb->truesize += skb->data_len;
14821498
14831499 for (i = 1; i < it->nr_segs; i++) {
1484
- struct page_frag *pfrag = &current->task_frag;
14851500 size_t fragsz = it->iov[i].iov_len;
1501
+ struct page *page;
1502
+ void *frag;
14861503
14871504 if (fragsz == 0 || fragsz > PAGE_SIZE) {
14881505 err = -EINVAL;
14891506 goto free;
14901507 }
1491
-
1492
- if (!skb_page_frag_refill(fragsz, pfrag, GFP_KERNEL)) {
1508
+ frag = netdev_alloc_frag(fragsz);
1509
+ if (!frag) {
14931510 err = -ENOMEM;
14941511 goto free;
14951512 }
1496
-
1497
- skb_fill_page_desc(skb, i - 1, pfrag->page,
1498
- pfrag->offset, fragsz);
1499
- page_ref_inc(pfrag->page);
1500
- pfrag->offset += fragsz;
1513
+ page = virt_to_head_page(frag);
1514
+ skb_fill_page_desc(skb, i - 1, page,
1515
+ frag - page_address(page), fragsz);
15011516 }
15021517
15031518 return skb;
....@@ -1589,11 +1604,62 @@
15891604 if (zerocopy)
15901605 return false;
15911606
1592
- if (SKB_DATA_ALIGN(len + TUN_RX_PAD) +
1607
+ if (SKB_DATA_ALIGN(len + TUN_RX_PAD + XDP_PACKET_HEADROOM) +
15931608 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) > PAGE_SIZE)
15941609 return false;
15951610
15961611 return true;
1612
+}
1613
+
1614
+static struct sk_buff *__tun_build_skb(struct tun_file *tfile,
1615
+ struct page_frag *alloc_frag, char *buf,
1616
+ int buflen, int len, int pad)
1617
+{
1618
+ struct sk_buff *skb = build_skb(buf, buflen);
1619
+
1620
+ if (!skb)
1621
+ return ERR_PTR(-ENOMEM);
1622
+
1623
+ skb_reserve(skb, pad);
1624
+ skb_put(skb, len);
1625
+ skb_set_owner_w(skb, tfile->socket.sk);
1626
+
1627
+ get_page(alloc_frag->page);
1628
+ alloc_frag->offset += buflen;
1629
+
1630
+ return skb;
1631
+}
1632
+
1633
+static int tun_xdp_act(struct tun_struct *tun, struct bpf_prog *xdp_prog,
1634
+ struct xdp_buff *xdp, u32 act)
1635
+{
1636
+ int err;
1637
+
1638
+ switch (act) {
1639
+ case XDP_REDIRECT:
1640
+ err = xdp_do_redirect(tun->dev, xdp, xdp_prog);
1641
+ if (err)
1642
+ return err;
1643
+ break;
1644
+ case XDP_TX:
1645
+ err = tun_xdp_tx(tun->dev, xdp);
1646
+ if (err < 0)
1647
+ return err;
1648
+ break;
1649
+ case XDP_PASS:
1650
+ break;
1651
+ default:
1652
+ bpf_warn_invalid_xdp_action(act);
1653
+ fallthrough;
1654
+ case XDP_ABORTED:
1655
+ trace_xdp_exception(tun->dev, xdp_prog, act);
1656
+ fallthrough;
1657
+ case XDP_DROP:
1658
+ this_cpu_inc(tun->pcpu_stats->rx_dropped);
1659
+ break;
1660
+ }
1661
+
1662
+ return act;
15971663 }
15981664
15991665 static struct sk_buff *tun_build_skb(struct tun_struct *tun,
....@@ -1603,18 +1669,17 @@
16031669 int len, int *skb_xdp)
16041670 {
16051671 struct page_frag *alloc_frag = &current->task_frag;
1606
- struct sk_buff *skb;
16071672 struct bpf_prog *xdp_prog;
16081673 int buflen = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
1609
- unsigned int delta = 0;
16101674 char *buf;
16111675 size_t copied;
1612
- int err, pad = TUN_RX_PAD;
1676
+ int pad = TUN_RX_PAD;
1677
+ int err = 0;
16131678
16141679 rcu_read_lock();
16151680 xdp_prog = rcu_dereference(tun->xdp_prog);
16161681 if (xdp_prog)
1617
- pad += TUN_HEADROOM;
1682
+ pad += XDP_PACKET_HEADROOM;
16181683 buflen += SKB_DATA_ALIGN(len + pad);
16191684 rcu_read_unlock();
16201685
....@@ -1633,17 +1698,19 @@
16331698 * of xdp_prog above, this should be rare and for simplicity
16341699 * we do XDP on skb in case the headroom is not enough.
16351700 */
1636
- if (hdr->gso_type || !xdp_prog)
1701
+ if (hdr->gso_type || !xdp_prog) {
16371702 *skb_xdp = 1;
1638
- else
1639
- *skb_xdp = 0;
1703
+ return __tun_build_skb(tfile, alloc_frag, buf, buflen, len,
1704
+ pad);
1705
+ }
1706
+
1707
+ *skb_xdp = 0;
16401708
16411709 local_bh_disable();
16421710 rcu_read_lock();
16431711 xdp_prog = rcu_dereference(tun->xdp_prog);
1644
- if (xdp_prog && !*skb_xdp) {
1712
+ if (xdp_prog) {
16451713 struct xdp_buff xdp;
1646
- void *orig_data;
16471714 u32 act;
16481715
16491716 xdp.data_hard_start = buf;
....@@ -1651,67 +1718,36 @@
16511718 xdp_set_data_meta_invalid(&xdp);
16521719 xdp.data_end = xdp.data + len;
16531720 xdp.rxq = &tfile->xdp_rxq;
1654
- orig_data = xdp.data;
1721
+ xdp.frame_sz = buflen;
1722
+
16551723 act = bpf_prog_run_xdp(xdp_prog, &xdp);
1656
-
1657
- switch (act) {
1658
- case XDP_REDIRECT:
1724
+ if (act == XDP_REDIRECT || act == XDP_TX) {
16591725 get_page(alloc_frag->page);
16601726 alloc_frag->offset += buflen;
1661
- err = xdp_do_redirect(tun->dev, &xdp, xdp_prog);
1662
- xdp_do_flush_map();
1663
- if (err)
1664
- goto err_redirect;
1665
- rcu_read_unlock();
1666
- local_bh_enable();
1667
- return NULL;
1668
- case XDP_TX:
1669
- get_page(alloc_frag->page);
1670
- alloc_frag->offset += buflen;
1671
- if (tun_xdp_tx(tun->dev, &xdp) < 0)
1672
- goto err_redirect;
1673
- rcu_read_unlock();
1674
- local_bh_enable();
1675
- return NULL;
1676
- case XDP_PASS:
1677
- delta = orig_data - xdp.data;
1678
- len = xdp.data_end - xdp.data;
1679
- break;
1680
- default:
1681
- bpf_warn_invalid_xdp_action(act);
1682
- /* fall through */
1683
- case XDP_ABORTED:
1684
- trace_xdp_exception(tun->dev, xdp_prog, act);
1685
- /* fall through */
1686
- case XDP_DROP:
1687
- goto err_xdp;
16881727 }
1728
+ err = tun_xdp_act(tun, xdp_prog, &xdp, act);
1729
+ if (err < 0) {
1730
+ if (act == XDP_REDIRECT || act == XDP_TX)
1731
+ put_page(alloc_frag->page);
1732
+ goto out;
1733
+ }
1734
+
1735
+ if (err == XDP_REDIRECT)
1736
+ xdp_do_flush();
1737
+ if (err != XDP_PASS)
1738
+ goto out;
1739
+
1740
+ pad = xdp.data - xdp.data_hard_start;
1741
+ len = xdp.data_end - xdp.data;
16891742 }
1690
-
1691
- skb = build_skb(buf, buflen);
1692
- if (!skb) {
1693
- rcu_read_unlock();
1694
- local_bh_enable();
1695
- return ERR_PTR(-ENOMEM);
1696
- }
1697
-
1698
- skb_reserve(skb, pad - delta);
1699
- skb_put(skb, len);
1700
- skb_set_owner_w(skb, tfile->socket.sk);
1701
- get_page(alloc_frag->page);
1702
- alloc_frag->offset += buflen;
1703
-
17041743 rcu_read_unlock();
17051744 local_bh_enable();
17061745
1707
- return skb;
1746
+ return __tun_build_skb(tfile, alloc_frag, buf, buflen, len, pad);
17081747
1709
-err_redirect:
1710
- put_page(alloc_frag->page);
1711
-err_xdp:
1748
+out:
17121749 rcu_read_unlock();
17131750 local_bh_enable();
1714
- this_cpu_inc(tun->pcpu_stats->rx_dropped);
17151751 return NULL;
17161752 }
17171753
....@@ -1902,7 +1938,8 @@
19021938 }
19031939
19041940 skb_reset_network_header(skb);
1905
- skb_probe_transport_header(skb, 0);
1941
+ skb_probe_transport_header(skb);
1942
+ skb_record_rx_queue(skb, tfile->queue_index);
19061943
19071944 if (skb_xdp) {
19081945 struct bpf_prog *xdp_prog;
....@@ -1947,20 +1984,29 @@
19471984
19481985 /* Exercise flow dissector code path. */
19491986 skb_push(skb, ETH_HLEN);
1950
- headlen = eth_get_headlen(skb->data, skb_headlen(skb));
1987
+ headlen = eth_get_headlen(tun->dev, skb->data,
1988
+ skb_headlen(skb));
19511989
19521990 if (unlikely(headlen > skb_headlen(skb))) {
1991
+ WARN_ON_ONCE(1);
1992
+ err = -ENOMEM;
19531993 this_cpu_inc(tun->pcpu_stats->rx_dropped);
1994
+napi_busy:
19541995 napi_free_frags(&tfile->napi);
19551996 rcu_read_unlock();
19561997 mutex_unlock(&tfile->napi_mutex);
1957
- WARN_ON(1);
1958
- return -ENOMEM;
1998
+ return err;
19591999 }
19602000
1961
- local_bh_disable();
1962
- napi_gro_frags(&tfile->napi);
1963
- local_bh_enable();
2001
+ if (likely(napi_schedule_prep(&tfile->napi))) {
2002
+ local_bh_disable();
2003
+ napi_gro_frags(&tfile->napi);
2004
+ napi_complete(&tfile->napi);
2005
+ local_bh_enable();
2006
+ } else {
2007
+ err = -EBUSY;
2008
+ goto napi_busy;
2009
+ }
19642010 mutex_unlock(&tfile->napi_mutex);
19652011 } else if (tfile->napi_enabled) {
19662012 struct sk_buff_head *queue = &tfile->sk.sk_write_queue;
....@@ -1984,8 +2030,8 @@
19842030
19852031 stats = get_cpu_ptr(tun->pcpu_stats);
19862032 u64_stats_update_begin(&stats->syncp);
1987
- stats->rx_packets++;
1988
- stats->rx_bytes += len;
2033
+ u64_stats_inc(&stats->rx_packets);
2034
+ u64_stats_add(&stats->rx_bytes, len);
19892035 u64_stats_update_end(&stats->syncp);
19902036 put_cpu_ptr(stats);
19912037
....@@ -2041,8 +2087,8 @@
20412087
20422088 stats = get_cpu_ptr(tun->pcpu_stats);
20432089 u64_stats_update_begin(&stats->syncp);
2044
- stats->tx_packets++;
2045
- stats->tx_bytes += ret;
2090
+ u64_stats_inc(&stats->tx_packets);
2091
+ u64_stats_add(&stats->tx_bytes, ret);
20462092 u64_stats_update_end(&stats->syncp);
20472093 put_cpu_ptr(tun->pcpu_stats);
20482094
....@@ -2136,8 +2182,8 @@
21362182 /* caller is in process context, */
21372183 stats = get_cpu_ptr(tun->pcpu_stats);
21382184 u64_stats_update_begin(&stats->syncp);
2139
- stats->tx_packets++;
2140
- stats->tx_bytes += skb->len + vlan_hlen;
2185
+ u64_stats_inc(&stats->tx_packets);
2186
+ u64_stats_add(&stats->tx_bytes, skb->len + vlan_hlen);
21412187 u64_stats_update_end(&stats->syncp);
21422188 put_cpu_ptr(tun->pcpu_stats);
21432189
....@@ -2158,7 +2204,7 @@
21582204 goto out;
21592205 }
21602206
2161
- add_wait_queue(&tfile->wq.wait, &wait);
2207
+ add_wait_queue(&tfile->socket.wq.wait, &wait);
21622208
21632209 while (1) {
21642210 set_current_state(TASK_INTERRUPTIBLE);
....@@ -2178,7 +2224,7 @@
21782224 }
21792225
21802226 __set_current_state(TASK_RUNNING);
2181
- remove_wait_queue(&tfile->wq.wait, &wait);
2227
+ remove_wait_queue(&tfile->socket.wq.wait, &wait);
21822228
21832229 out:
21842230 *err = error;
....@@ -2191,8 +2237,6 @@
21912237 {
21922238 ssize_t ret;
21932239 int err;
2194
-
2195
- tun_debug(KERN_INFO, tun, "tun_do_read\n");
21962240
21972241 if (!iov_iter_count(to)) {
21982242 tun_ptr_free(ptr);
....@@ -2284,7 +2328,9 @@
22842328 struct tun_struct *tun = netdev_priv(dev);
22852329
22862330 BUG_ON(!(list_empty(&tun->disabled)));
2331
+
22872332 free_percpu(tun->pcpu_stats);
2333
+
22882334 tun_flow_uninit(tun);
22892335 security_tun_dev_free_security(tun->security);
22902336 __tun_set_ebpf(tun, &tun->steering_prog, NULL);
....@@ -2400,18 +2446,160 @@
24002446 kill_fasync(&tfile->fasync, SIGIO, POLL_OUT);
24012447 }
24022448
2449
+static void tun_put_page(struct tun_page *tpage)
2450
+{
2451
+ if (tpage->page)
2452
+ __page_frag_cache_drain(tpage->page, tpage->count);
2453
+}
2454
+
2455
+static int tun_xdp_one(struct tun_struct *tun,
2456
+ struct tun_file *tfile,
2457
+ struct xdp_buff *xdp, int *flush,
2458
+ struct tun_page *tpage)
2459
+{
2460
+ unsigned int datasize = xdp->data_end - xdp->data;
2461
+ struct tun_xdp_hdr *hdr = xdp->data_hard_start;
2462
+ struct virtio_net_hdr *gso = &hdr->gso;
2463
+ struct tun_pcpu_stats *stats;
2464
+ struct bpf_prog *xdp_prog;
2465
+ struct sk_buff *skb = NULL;
2466
+ u32 rxhash = 0, act;
2467
+ int buflen = hdr->buflen;
2468
+ int err = 0;
2469
+ bool skb_xdp = false;
2470
+ struct page *page;
2471
+
2472
+ xdp_prog = rcu_dereference(tun->xdp_prog);
2473
+ if (xdp_prog) {
2474
+ if (gso->gso_type) {
2475
+ skb_xdp = true;
2476
+ goto build;
2477
+ }
2478
+ xdp_set_data_meta_invalid(xdp);
2479
+ xdp->rxq = &tfile->xdp_rxq;
2480
+ xdp->frame_sz = buflen;
2481
+
2482
+ act = bpf_prog_run_xdp(xdp_prog, xdp);
2483
+ err = tun_xdp_act(tun, xdp_prog, xdp, act);
2484
+ if (err < 0) {
2485
+ put_page(virt_to_head_page(xdp->data));
2486
+ return err;
2487
+ }
2488
+
2489
+ switch (err) {
2490
+ case XDP_REDIRECT:
2491
+ *flush = true;
2492
+ fallthrough;
2493
+ case XDP_TX:
2494
+ return 0;
2495
+ case XDP_PASS:
2496
+ break;
2497
+ default:
2498
+ page = virt_to_head_page(xdp->data);
2499
+ if (tpage->page == page) {
2500
+ ++tpage->count;
2501
+ } else {
2502
+ tun_put_page(tpage);
2503
+ tpage->page = page;
2504
+ tpage->count = 1;
2505
+ }
2506
+ return 0;
2507
+ }
2508
+ }
2509
+
2510
+build:
2511
+ skb = build_skb(xdp->data_hard_start, buflen);
2512
+ if (!skb) {
2513
+ err = -ENOMEM;
2514
+ goto out;
2515
+ }
2516
+
2517
+ skb_reserve(skb, xdp->data - xdp->data_hard_start);
2518
+ skb_put(skb, xdp->data_end - xdp->data);
2519
+
2520
+ if (virtio_net_hdr_to_skb(skb, gso, tun_is_little_endian(tun))) {
2521
+ this_cpu_inc(tun->pcpu_stats->rx_frame_errors);
2522
+ kfree_skb(skb);
2523
+ err = -EINVAL;
2524
+ goto out;
2525
+ }
2526
+
2527
+ skb->protocol = eth_type_trans(skb, tun->dev);
2528
+ skb_reset_network_header(skb);
2529
+ skb_probe_transport_header(skb);
2530
+ skb_record_rx_queue(skb, tfile->queue_index);
2531
+
2532
+ if (skb_xdp) {
2533
+ err = do_xdp_generic(xdp_prog, skb);
2534
+ if (err != XDP_PASS)
2535
+ goto out;
2536
+ }
2537
+
2538
+ if (!rcu_dereference(tun->steering_prog) && tun->numqueues > 1 &&
2539
+ !tfile->detached)
2540
+ rxhash = __skb_get_hash_symmetric(skb);
2541
+
2542
+ netif_receive_skb(skb);
2543
+
2544
+ /* No need for get_cpu_ptr() here since this function is
2545
+ * always called with bh disabled
2546
+ */
2547
+ stats = this_cpu_ptr(tun->pcpu_stats);
2548
+ u64_stats_update_begin(&stats->syncp);
2549
+ u64_stats_inc(&stats->rx_packets);
2550
+ u64_stats_add(&stats->rx_bytes, datasize);
2551
+ u64_stats_update_end(&stats->syncp);
2552
+
2553
+ if (rxhash)
2554
+ tun_flow_update(tun, rxhash, tfile);
2555
+
2556
+out:
2557
+ return err;
2558
+}
2559
+
24032560 static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
24042561 {
2405
- int ret;
2562
+ int ret, i;
24062563 struct tun_file *tfile = container_of(sock, struct tun_file, socket);
24072564 struct tun_struct *tun = tun_get(tfile);
2565
+ struct tun_msg_ctl *ctl = m->msg_control;
2566
+ struct xdp_buff *xdp;
24082567
24092568 if (!tun)
24102569 return -EBADFD;
24112570
2412
- ret = tun_get_user(tun, tfile, m->msg_control, &m->msg_iter,
2571
+ if (m->msg_controllen == sizeof(struct tun_msg_ctl) &&
2572
+ ctl && ctl->type == TUN_MSG_PTR) {
2573
+ struct tun_page tpage;
2574
+ int n = ctl->num;
2575
+ int flush = 0;
2576
+
2577
+ memset(&tpage, 0, sizeof(tpage));
2578
+
2579
+ local_bh_disable();
2580
+ rcu_read_lock();
2581
+
2582
+ for (i = 0; i < n; i++) {
2583
+ xdp = &((struct xdp_buff *)ctl->ptr)[i];
2584
+ tun_xdp_one(tun, tfile, xdp, &flush, &tpage);
2585
+ }
2586
+
2587
+ if (flush)
2588
+ xdp_do_flush();
2589
+
2590
+ rcu_read_unlock();
2591
+ local_bh_enable();
2592
+
2593
+ tun_put_page(&tpage);
2594
+
2595
+ ret = total_len;
2596
+ goto out;
2597
+ }
2598
+
2599
+ ret = tun_get_user(tun, tfile, ctl ? ctl->ptr : NULL, &m->msg_iter,
24132600 m->msg_flags & MSG_DONTWAIT,
24142601 m->msg_flags & MSG_MORE);
2602
+out:
24152603 tun_put(tun);
24162604 return ret;
24172605 }
....@@ -2636,9 +2824,6 @@
26362824
26372825 if (!dev)
26382826 return -ENOMEM;
2639
- err = dev_get_valid_name(net, dev, name);
2640
- if (err < 0)
2641
- goto err_free_dev;
26422827
26432828 dev_net_set(dev, net);
26442829 dev->rtnl_link_ops = &tun_link_ops;
....@@ -2657,41 +2842,16 @@
26572842 tun->rx_batched = 0;
26582843 RCU_INIT_POINTER(tun->steering_prog, NULL);
26592844
2660
- tun->pcpu_stats = netdev_alloc_pcpu_stats(struct tun_pcpu_stats);
2661
- if (!tun->pcpu_stats) {
2662
- err = -ENOMEM;
2663
- goto err_free_dev;
2664
- }
2845
+ tun->ifr = ifr;
2846
+ tun->file = file;
26652847
2666
- spin_lock_init(&tun->lock);
2667
-
2668
- err = security_tun_dev_alloc_security(&tun->security);
2669
- if (err < 0)
2670
- goto err_free_stat;
2671
-
2672
- tun_net_init(dev);
2673
- tun_flow_init(tun);
2674
-
2675
- dev->hw_features = NETIF_F_SG | NETIF_F_FRAGLIST |
2676
- TUN_USER_FEATURES | NETIF_F_HW_VLAN_CTAG_TX |
2677
- NETIF_F_HW_VLAN_STAG_TX;
2678
- dev->features = dev->hw_features | NETIF_F_LLTX;
2679
- dev->vlan_features = dev->features &
2680
- ~(NETIF_F_HW_VLAN_CTAG_TX |
2681
- NETIF_F_HW_VLAN_STAG_TX);
2682
-
2683
- tun->flags = (tun->flags & ~TUN_FEATURES) |
2684
- (ifr->ifr_flags & TUN_FEATURES);
2685
-
2686
- INIT_LIST_HEAD(&tun->disabled);
2687
- err = tun_attach(tun, file, false, ifr->ifr_flags & IFF_NAPI,
2688
- ifr->ifr_flags & IFF_NAPI_FRAGS, false);
2689
- if (err < 0)
2690
- goto err_free_flow;
2848
+ tun_net_initialize(dev);
26912849
26922850 err = register_netdevice(tun->dev);
2693
- if (err < 0)
2694
- goto err_detach;
2851
+ if (err < 0) {
2852
+ free_netdev(dev);
2853
+ return err;
2854
+ }
26952855 /* free_netdev() won't check refcnt, to aovid race
26962856 * with dev_put() we need publish tun after registration.
26972857 */
....@@ -2699,8 +2859,6 @@
26992859 }
27002860
27012861 netif_carrier_on(tun->dev);
2702
-
2703
- tun_debug(KERN_INFO, tun, "tun_set_iff\n");
27042862
27052863 /* Make sure persistent devices do not get stuck in
27062864 * xoff state.
....@@ -2710,27 +2868,10 @@
27102868
27112869 strcpy(ifr->ifr_name, tun->dev->name);
27122870 return 0;
2713
-
2714
-err_detach:
2715
- tun_detach_all(dev);
2716
- /* register_netdevice() already called tun_free_netdev() */
2717
- goto err_free_dev;
2718
-
2719
-err_free_flow:
2720
- tun_flow_uninit(tun);
2721
- security_tun_dev_free_security(tun->security);
2722
-err_free_stat:
2723
- free_percpu(tun->pcpu_stats);
2724
-err_free_dev:
2725
- free_netdev(dev);
2726
- return err;
27272871 }
27282872
2729
-static void tun_get_iff(struct net *net, struct tun_struct *tun,
2730
- struct ifreq *ifr)
2873
+static void tun_get_iff(struct tun_struct *tun, struct ifreq *ifr)
27312874 {
2732
- tun_debug(KERN_INFO, tun, "tun_get_iff\n");
2733
-
27342875 strcpy(ifr->ifr_name, tun->dev->name);
27352876
27362877 ifr->ifr_flags = tun_flags(tun);
....@@ -2857,7 +2998,7 @@
28572998 return ret;
28582999 }
28593000
2860
-static int tun_set_ebpf(struct tun_struct *tun, struct tun_prog **prog_p,
3001
+static int tun_set_ebpf(struct tun_struct *tun, struct tun_prog __rcu **prog_p,
28613002 void __user *data)
28623003 {
28633004 struct bpf_prog *prog;
....@@ -2923,12 +3064,12 @@
29233064 struct net *net = sock_net(&tfile->sk);
29243065 struct tun_struct *tun;
29253066 void __user* argp = (void __user*)arg;
3067
+ unsigned int ifindex, carrier;
29263068 struct ifreq ifr;
29273069 kuid_t owner;
29283070 kgid_t group;
29293071 int sndbuf;
29303072 int vnet_hdr_sz;
2931
- unsigned int ifindex;
29323073 int le;
29333074 int ret;
29343075 bool do_notify = false;
....@@ -2993,12 +3134,13 @@
29933134 if (!tun)
29943135 goto unlock;
29953136
2996
- tun_debug(KERN_INFO, tun, "tun_chr_ioctl cmd %u\n", cmd);
3137
+ netif_info(tun, drv, tun->dev, "tun_chr_ioctl cmd %u\n", cmd);
29973138
3139
+ net = dev_net(tun->dev);
29983140 ret = 0;
29993141 switch (cmd) {
30003142 case TUNGETIFF:
3001
- tun_get_iff(current->nsproxy->net_ns, tun, &ifr);
3143
+ tun_get_iff(tun, &ifr);
30023144
30033145 if (tfile->detached)
30043146 ifr.ifr_flags |= IFF_DETACH_QUEUE;
....@@ -3013,8 +3155,8 @@
30133155 /* Disable/Enable checksum */
30143156
30153157 /* [unimplemented] */
3016
- tun_debug(KERN_INFO, tun, "ignored: set checksum %s\n",
3017
- arg ? "disabled" : "enabled");
3158
+ netif_info(tun, drv, tun->dev, "ignored: set checksum %s\n",
3159
+ arg ? "disabled" : "enabled");
30183160 break;
30193161
30203162 case TUNSETPERSIST:
....@@ -3032,8 +3174,8 @@
30323174 do_notify = true;
30333175 }
30343176
3035
- tun_debug(KERN_INFO, tun, "persist %s\n",
3036
- arg ? "enabled" : "disabled");
3177
+ netif_info(tun, drv, tun->dev, "persist %s\n",
3178
+ arg ? "enabled" : "disabled");
30373179 break;
30383180
30393181 case TUNSETOWNER:
....@@ -3045,8 +3187,8 @@
30453187 }
30463188 tun->owner = owner;
30473189 do_notify = true;
3048
- tun_debug(KERN_INFO, tun, "owner set to %u\n",
3049
- from_kuid(&init_user_ns, tun->owner));
3190
+ netif_info(tun, drv, tun->dev, "owner set to %u\n",
3191
+ from_kuid(&init_user_ns, tun->owner));
30503192 break;
30513193
30523194 case TUNSETGROUP:
....@@ -3058,30 +3200,29 @@
30583200 }
30593201 tun->group = group;
30603202 do_notify = true;
3061
- tun_debug(KERN_INFO, tun, "group set to %u\n",
3062
- from_kgid(&init_user_ns, tun->group));
3203
+ netif_info(tun, drv, tun->dev, "group set to %u\n",
3204
+ from_kgid(&init_user_ns, tun->group));
30633205 break;
30643206
30653207 case TUNSETLINK:
30663208 /* Only allow setting the type when the interface is down */
30673209 if (tun->dev->flags & IFF_UP) {
3068
- tun_debug(KERN_INFO, tun,
3069
- "Linktype set failed because interface is up\n");
3210
+ netif_info(tun, drv, tun->dev,
3211
+ "Linktype set failed because interface is up\n");
30703212 ret = -EBUSY;
30713213 } else {
30723214 tun->dev->type = (int) arg;
30733215 tun->dev->addr_len = tun_get_addr_len(tun->dev->type);
3074
- tun_debug(KERN_INFO, tun, "linktype set to %d\n",
3075
- tun->dev->type);
3216
+ netif_info(tun, drv, tun->dev, "linktype set to %d\n",
3217
+ tun->dev->type);
30763218 ret = 0;
30773219 }
30783220 break;
30793221
3080
-#ifdef TUN_DEBUG
30813222 case TUNSETDEBUG:
3082
- tun->debug = arg;
3223
+ tun->msg_enable = (u32)arg;
30833224 break;
3084
-#endif
3225
+
30853226 case TUNSETOFFLOAD:
30863227 ret = set_offload(tun, arg);
30873228 break;
....@@ -3096,18 +3237,14 @@
30963237
30973238 case SIOCGIFHWADDR:
30983239 /* Get hw address */
3099
- memcpy(ifr.ifr_hwaddr.sa_data, tun->dev->dev_addr, ETH_ALEN);
3100
- ifr.ifr_hwaddr.sa_family = tun->dev->type;
3240
+ dev_get_mac_address(&ifr.ifr_hwaddr, net, tun->dev->name);
31013241 if (copy_to_user(argp, &ifr, ifreq_len))
31023242 ret = -EFAULT;
31033243 break;
31043244
31053245 case SIOCSIFHWADDR:
31063246 /* Set hw address */
3107
- tun_debug(KERN_DEBUG, tun, "set hw address: %pM\n",
3108
- ifr.ifr_hwaddr.sa_data);
3109
-
3110
- ret = dev_set_mac_address(tun->dev, &ifr.ifr_hwaddr);
3247
+ ret = dev_set_mac_address_user(tun->dev, &ifr.ifr_hwaddr, NULL);
31113248 break;
31123249
31133250 case TUNGETSNDBUF:
....@@ -3213,6 +3350,21 @@
32133350 ret = tun_set_ebpf(tun, &tun->filter_prog, argp);
32143351 break;
32153352
3353
+ case TUNSETCARRIER:
3354
+ ret = -EFAULT;
3355
+ if (copy_from_user(&carrier, argp, sizeof(carrier)))
3356
+ goto unlock;
3357
+
3358
+ ret = tun_net_change_carrier(tun->dev, (bool)carrier);
3359
+ break;
3360
+
3361
+ case TUNGETDEVNETNS:
3362
+ ret = -EPERM;
3363
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
3364
+ goto unlock;
3365
+ ret = open_related_ns(&net->ns, get_net_ns);
3366
+ break;
3367
+
32163368 default:
32173369 ret = -EINVAL;
32183370 break;
....@@ -3286,8 +3438,6 @@
32863438 struct net *net = current->nsproxy->net_ns;
32873439 struct tun_file *tfile;
32883440
3289
- DBG1(KERN_INFO, "tunX: tun_chr_open\n");
3290
-
32913441 tfile = (struct tun_file *)sk_alloc(net, AF_UNSPEC, GFP_KERNEL,
32923442 &tun_proto, 0);
32933443 if (!tfile)
....@@ -3302,13 +3452,12 @@
33023452 tfile->flags = 0;
33033453 tfile->ifindex = 0;
33043454
3305
- init_waitqueue_head(&tfile->wq.wait);
3306
- RCU_INIT_POINTER(tfile->socket.wq, &tfile->wq);
3455
+ init_waitqueue_head(&tfile->socket.wq.wait);
33073456
33083457 tfile->socket.file = file;
33093458 tfile->socket.ops = &tun_socket_ops;
33103459
3311
- sock_init_data(&tfile->socket, &tfile->sk);
3460
+ sock_init_data_uid(&tfile->socket, &tfile->sk, current_fsuid());
33123461
33133462 tfile->sk.sk_write_space = tun_sock_write_space;
33143463 tfile->sk.sk_sndbuf = INT_MAX;
....@@ -3342,7 +3491,7 @@
33423491 rtnl_lock();
33433492 tun = tun_get(tfile);
33443493 if (tun)
3345
- tun_get_iff(current->nsproxy->net_ns, tun, &ifr);
3494
+ tun_get_iff(tun, &ifr);
33463495 rtnl_unlock();
33473496
33483497 if (tun)
....@@ -3428,20 +3577,16 @@
34283577
34293578 static u32 tun_get_msglevel(struct net_device *dev)
34303579 {
3431
-#ifdef TUN_DEBUG
34323580 struct tun_struct *tun = netdev_priv(dev);
3433
- return tun->debug;
3434
-#else
3435
- return -EOPNOTSUPP;
3436
-#endif
3581
+
3582
+ return tun->msg_enable;
34373583 }
34383584
34393585 static void tun_set_msglevel(struct net_device *dev, u32 value)
34403586 {
3441
-#ifdef TUN_DEBUG
34423587 struct tun_struct *tun = netdev_priv(dev);
3443
- tun->debug = value;
3444
-#endif
3588
+
3589
+ tun->msg_enable = value;
34453590 }
34463591
34473592 static int tun_get_coalesce(struct net_device *dev,
....@@ -3468,6 +3613,7 @@
34683613 }
34693614
34703615 static const struct ethtool_ops tun_ethtool_ops = {
3616
+ .supported_coalesce_params = ETHTOOL_COALESCE_RX_MAX_FRAMES,
34713617 .get_drvinfo = tun_get_drvinfo,
34723618 .get_msglevel = tun_get_msglevel,
34733619 .set_msglevel = tun_set_msglevel,