hc
2023-12-11 6778948f9de86c3cfaf36725a7c87dcff9ba247f
kernel/net/packet/af_packet.c
....@@ -1,3 +1,4 @@
1
+// SPDX-License-Identifier: GPL-2.0-or-later
12 /*
23 * INET An implementation of the TCP/IP protocol suite for the LINUX
34 * operating system. INET is implemented using the BSD Socket
....@@ -43,13 +44,6 @@
4344 * Chetan Loke : Implemented TPACKET_V3 block abstraction
4445 * layer.
4546 * Copyright (C) 2011, <lokec@ccs.neu.edu>
46
- *
47
- *
48
- * This program is free software; you can redistribute it and/or
49
- * modify it under the terms of the GNU General Public License
50
- * as published by the Free Software Foundation; either version
51
- * 2 of the License, or (at your option) any later version.
52
- *
5347 */
5448
5549 #include <linux/types.h>
....@@ -63,7 +57,6 @@
6357 #include <linux/if_packet.h>
6458 #include <linux/wireless.h>
6559 #include <linux/kernel.h>
66
-#include <linux/delay.h>
6760 #include <linux/kmod.h>
6861 #include <linux/slab.h>
6962 #include <linux/vmalloc.h>
....@@ -100,52 +93,56 @@
10093
10194 /*
10295 Assumptions:
103
- - if device has no dev->hard_header routine, it adds and removes ll header
104
- inside itself. In this case ll header is invisible outside of device,
105
- but higher levels still should reserve dev->hard_header_len.
106
- Some devices are enough clever to reallocate skb, when header
107
- will not fit to reserved space (tunnel), another ones are silly
108
- (PPP).
96
+ - If the device has no dev->header_ops->create, there is no LL header
97
+ visible above the device. In this case, its hard_header_len should be 0.
98
+ The device may prepend its own header internally. In this case, its
99
+ needed_headroom should be set to the space needed for it to add its
100
+ internal header.
101
+ For example, a WiFi driver pretending to be an Ethernet driver should
102
+ set its hard_header_len to be the Ethernet header length, and set its
103
+ needed_headroom to be (the real WiFi header length - the fake Ethernet
104
+ header length).
109105 - packet socket receives packets with pulled ll header,
110106 so that SOCK_RAW should push it back.
111107
112108 On receive:
113109 -----------
114110
115
-Incoming, dev->hard_header!=NULL
111
+Incoming, dev_has_header(dev) == true
116112 mac_header -> ll header
117113 data -> data
118114
119
-Outgoing, dev->hard_header!=NULL
115
+Outgoing, dev_has_header(dev) == true
120116 mac_header -> ll header
121117 data -> ll header
122118
123
-Incoming, dev->hard_header==NULL
124
- mac_header -> UNKNOWN position. It is very likely, that it points to ll
125
- header. PPP makes it, that is wrong, because introduce
126
- assymetry between rx and tx paths.
119
+Incoming, dev_has_header(dev) == false
120
+ mac_header -> data
121
+ However drivers often make it point to the ll header.
122
+ This is incorrect because the ll header should be invisible to us.
127123 data -> data
128124
129
-Outgoing, dev->hard_header==NULL
130
- mac_header -> data. ll header is still not built!
125
+Outgoing, dev_has_header(dev) == false
126
+ mac_header -> data. ll header is invisible to us.
131127 data -> data
132128
133129 Resume
134
- If dev->hard_header==NULL we are unlikely to restore sensible ll header.
130
+ If dev_has_header(dev) == false we are unable to restore the ll header,
131
+ because it is invisible to us.
135132
136133
137134 On transmit:
138135 ------------
139136
140
-dev->hard_header != NULL
137
+dev->header_ops != NULL
141138 mac_header -> ll header
142139 data -> ll header
143140
144
-dev->hard_header == NULL (ll header is added by device, we cannot control it)
141
+dev->header_ops == NULL (ll header is invisible to us)
145142 mac_header -> data
146143 data -> data
147144
148
- We should set nh.raw on output to correct posistion,
145
+ We should set network_header on output to the correct position,
149146 packet classifier depends on it.
150147 */
151148
....@@ -184,7 +181,6 @@
184181 #define BLOCK_LEN(x) ((x)->hdr.bh1.blk_len)
185182 #define BLOCK_SNUM(x) ((x)->hdr.bh1.seq_num)
186183 #define BLOCK_O2PRIV(x) ((x)->offset_to_priv)
187
-#define BLOCK_PRIV(x) ((void *)((char *)(x) + BLOCK_O2PRIV(x)))
188184
189185 struct packet_sock;
190186 static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
....@@ -276,24 +272,22 @@
276272 return po->xmit == packet_direct_xmit;
277273 }
278274
279
-static u16 __packet_pick_tx_queue(struct net_device *dev, struct sk_buff *skb,
280
- struct net_device *sb_dev)
281
-{
282
- return dev_pick_tx_cpu_id(dev, skb, sb_dev, NULL);
283
-}
284
-
285275 static u16 packet_pick_tx_queue(struct sk_buff *skb)
286276 {
287277 struct net_device *dev = skb->dev;
288278 const struct net_device_ops *ops = dev->netdev_ops;
279
+ int cpu = raw_smp_processor_id();
289280 u16 queue_index;
290281
282
+#ifdef CONFIG_XPS
283
+ skb->sender_cpu = cpu + 1;
284
+#endif
285
+ skb_record_rx_queue(skb, cpu % dev->real_num_tx_queues);
291286 if (ops->ndo_select_queue) {
292
- queue_index = ops->ndo_select_queue(dev, skb, NULL,
293
- __packet_pick_tx_queue);
287
+ queue_index = ops->ndo_select_queue(dev, skb, NULL);
294288 queue_index = netdev_cap_txqueue(dev, queue_index);
295289 } else {
296
- queue_index = __packet_pick_tx_queue(dev, skb, NULL);
290
+ queue_index = netdev_pick_tx(dev, skb, NULL);
297291 }
298292
299293 return queue_index;
....@@ -393,7 +387,7 @@
393387 smp_wmb();
394388 }
395389
396
-static int __packet_get_status(struct packet_sock *po, void *frame)
390
+static int __packet_get_status(const struct packet_sock *po, void *frame)
397391 {
398392 union tpacket_uhdr h;
399393
....@@ -417,17 +411,18 @@
417411 }
418412 }
419413
420
-static __u32 tpacket_get_timestamp(struct sk_buff *skb, struct timespec *ts,
414
+static __u32 tpacket_get_timestamp(struct sk_buff *skb, struct timespec64 *ts,
421415 unsigned int flags)
422416 {
423417 struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb);
424418
425419 if (shhwtstamps &&
426420 (flags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
427
- ktime_to_timespec_cond(shhwtstamps->hwtstamp, ts))
421
+ ktime_to_timespec64_cond(shhwtstamps->hwtstamp, ts))
428422 return TP_STATUS_TS_RAW_HARDWARE;
429423
430
- if (ktime_to_timespec_cond(skb->tstamp, ts))
424
+ if ((flags & SOF_TIMESTAMPING_SOFTWARE) &&
425
+ ktime_to_timespec64_cond(skb->tstamp, ts))
431426 return TP_STATUS_TS_SOFTWARE;
432427
433428 return 0;
....@@ -437,13 +432,20 @@
437432 struct sk_buff *skb)
438433 {
439434 union tpacket_uhdr h;
440
- struct timespec ts;
435
+ struct timespec64 ts;
441436 __u32 ts_status;
442437
443438 if (!(ts_status = tpacket_get_timestamp(skb, &ts, po->tp_tstamp)))
444439 return 0;
445440
446441 h.raw = frame;
442
+ /*
443
+ * versions 1 through 3 overflow the timestamps in y2106, since they
444
+ * all store the seconds in a 32-bit unsigned integer.
445
+ * If we create a version 4, that should have a 64-bit timestamp,
446
+ * either 64-bit seconds + 32-bit nanoseconds, or just 64-bit
447
+ * nanoseconds.
448
+ */
447449 switch (po->tp_version) {
448450 case TPACKET_V1:
449451 h.h1->tp_sec = ts.tv_sec;
....@@ -469,10 +471,10 @@
469471 return ts_status;
470472 }
471473
472
-static void *packet_lookup_frame(struct packet_sock *po,
473
- struct packet_ring_buffer *rb,
474
- unsigned int position,
475
- int status)
474
+static void *packet_lookup_frame(const struct packet_sock *po,
475
+ const struct packet_ring_buffer *rb,
476
+ unsigned int position,
477
+ int status)
476478 {
477479 unsigned int pg_vec_pos, frame_offset;
478480 union tpacket_uhdr h;
....@@ -529,7 +531,7 @@
529531 int blk_size_in_bytes)
530532 {
531533 struct net_device *dev;
532
- unsigned int mbits = 0, msec = 0, div = 0, tmo = 0;
534
+ unsigned int mbits, div;
533535 struct ethtool_link_ksettings ecmd;
534536 int err;
535537
....@@ -541,31 +543,25 @@
541543 }
542544 err = __ethtool_get_link_ksettings(dev, &ecmd);
543545 rtnl_unlock();
544
- if (!err) {
545
- /*
546
- * If the link speed is so slow you don't really
547
- * need to worry about perf anyways
548
- */
549
- if (ecmd.base.speed < SPEED_1000 ||
550
- ecmd.base.speed == SPEED_UNKNOWN) {
551
- return DEFAULT_PRB_RETIRE_TOV;
552
- } else {
553
- msec = 1;
554
- div = ecmd.base.speed / 1000;
555
- }
556
- } else
546
+ if (err)
557547 return DEFAULT_PRB_RETIRE_TOV;
558548
549
+ /* If the link speed is so slow you don't really
550
+ * need to worry about perf anyways
551
+ */
552
+ if (ecmd.base.speed < SPEED_1000 ||
553
+ ecmd.base.speed == SPEED_UNKNOWN)
554
+ return DEFAULT_PRB_RETIRE_TOV;
555
+
556
+ div = ecmd.base.speed / 1000;
559557 mbits = (blk_size_in_bytes * 8) / (1024 * 1024);
560558
561559 if (div)
562560 mbits /= div;
563561
564
- tmo = mbits * msec;
565
-
566562 if (div)
567
- return tmo+1;
568
- return tmo;
563
+ return mbits + 1;
564
+ return mbits;
569565 }
570566
571567 static void prb_init_ft_ops(struct tpacket_kbdq_core *p1,
....@@ -601,6 +597,7 @@
601597 req_u->req3.tp_block_size);
602598 p1->tov_in_jiffies = msecs_to_jiffies(p1->retire_blk_tov);
603599 p1->blk_sizeof_priv = req_u->req3.tp_sizeof_priv;
600
+ rwlock_init(&p1->blk_fill_in_prog_lock);
604601
605602 p1->max_frame_len = p1->kblk_size - BLK_PLUS_PRIV(p1->blk_sizeof_priv);
606603 prb_init_ft_ops(p1, req_u);
....@@ -667,10 +664,9 @@
667664 *
668665 */
669666 if (BLOCK_NUM_PKTS(pbd)) {
670
- while (atomic_read(&pkc->blk_fill_in_prog)) {
671
- /* Waiting for skb_copy_bits to finish... */
672
- cpu_chill();
673
- }
667
+ /* Waiting for skb_copy_bits to finish... */
668
+ write_lock(&pkc->blk_fill_in_prog_lock);
669
+ write_unlock(&pkc->blk_fill_in_prog_lock);
674670 }
675671
676672 if (pkc->last_kactive_blk_num == pkc->kactive_blk_num) {
....@@ -768,7 +764,7 @@
768764 struct tpacket_hdr_v1 *h1 = &pbd1->hdr.bh1;
769765 struct sock *sk = &po->sk;
770766
771
- if (po->stats.stats3.tp_drops)
767
+ if (atomic_read(&po->tp_drops))
772768 status |= TP_STATUS_LOSING;
773769
774770 last_pkt = (struct tpacket3_hdr *)pkc1->prev;
....@@ -784,8 +780,8 @@
784780 * It shouldn't really happen as we don't close empty
785781 * blocks. See prb_retire_rx_blk_timer_expired().
786782 */
787
- struct timespec ts;
788
- getnstimeofday(&ts);
783
+ struct timespec64 ts;
784
+ ktime_get_real_ts64(&ts);
789785 h1->ts_last_pkt.ts_sec = ts.tv_sec;
790786 h1->ts_last_pkt.ts_nsec = ts.tv_nsec;
791787 }
....@@ -815,7 +811,7 @@
815811 static void prb_open_block(struct tpacket_kbdq_core *pkc1,
816812 struct tpacket_block_desc *pbd1)
817813 {
818
- struct timespec ts;
814
+ struct timespec64 ts;
819815 struct tpacket_hdr_v1 *h1 = &pbd1->hdr.bh1;
820816
821817 smp_rmb();
....@@ -828,7 +824,7 @@
828824 BLOCK_NUM_PKTS(pbd1) = 0;
829825 BLOCK_LEN(pbd1) = BLK_PLUS_PRIV(pkc1->blk_sizeof_priv);
830826
831
- getnstimeofday(&ts);
827
+ ktime_get_real_ts64(&ts);
832828
833829 h1->ts_first_pkt.ts_sec = ts.tv_sec;
834830 h1->ts_first_pkt.ts_nsec = ts.tv_nsec;
....@@ -929,10 +925,9 @@
929925 * the timer-handler already handled this case.
930926 */
931927 if (!(status & TP_STATUS_BLK_TMO)) {
932
- while (atomic_read(&pkc->blk_fill_in_prog)) {
933
- /* Waiting for skb_copy_bits to finish... */
934
- cpu_chill();
935
- }
928
+ /* Waiting for skb_copy_bits to finish... */
929
+ write_lock(&pkc->blk_fill_in_prog_lock);
930
+ write_unlock(&pkc->blk_fill_in_prog_lock);
936931 }
937932 prb_close_block(pkc, pbd, po, status);
938933 return;
....@@ -953,7 +948,8 @@
953948 __releases(&pkc->blk_fill_in_prog_lock)
954949 {
955950 struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(rb);
956
- atomic_dec(&pkc->blk_fill_in_prog);
951
+
952
+ read_unlock(&pkc->blk_fill_in_prog_lock);
957953 }
958954
959955 static void prb_fill_rxhash(struct tpacket_kbdq_core *pkc,
....@@ -1008,14 +1004,13 @@
10081004 pkc->nxt_offset += TOTAL_PKT_LEN_INCL_ALIGN(len);
10091005 BLOCK_LEN(pbd) += TOTAL_PKT_LEN_INCL_ALIGN(len);
10101006 BLOCK_NUM_PKTS(pbd) += 1;
1011
- atomic_inc(&pkc->blk_fill_in_prog);
1007
+ read_lock(&pkc->blk_fill_in_prog_lock);
10121008 prb_run_all_ft_ops(pkc, ppd);
10131009 }
10141010
10151011 /* Assumes caller has the sk->rx_queue.lock */
10161012 static void *__packet_lookup_frame_in_block(struct packet_sock *po,
10171013 struct sk_buff *skb,
1018
- int status,
10191014 unsigned int len
10201015 )
10211016 {
....@@ -1087,7 +1082,7 @@
10871082 po->rx_ring.head, status);
10881083 return curr;
10891084 case TPACKET_V3:
1090
- return __packet_lookup_frame_in_block(po, skb, status, len);
1085
+ return __packet_lookup_frame_in_block(po, skb, len);
10911086 default:
10921087 WARN(1, "TPACKET version not supported\n");
10931088 BUG();
....@@ -1095,10 +1090,10 @@
10951090 }
10961091 }
10971092
1098
-static void *prb_lookup_block(struct packet_sock *po,
1099
- struct packet_ring_buffer *rb,
1100
- unsigned int idx,
1101
- int status)
1093
+static void *prb_lookup_block(const struct packet_sock *po,
1094
+ const struct packet_ring_buffer *rb,
1095
+ unsigned int idx,
1096
+ int status)
11021097 {
11031098 struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(rb);
11041099 struct tpacket_block_desc *pbd = GET_PBLOCK_DESC(pkc, idx);
....@@ -1211,12 +1206,12 @@
12111206 #define ROOM_LOW 0x1
12121207 #define ROOM_NORMAL 0x2
12131208
1214
-static bool __tpacket_has_room(struct packet_sock *po, int pow_off)
1209
+static bool __tpacket_has_room(const struct packet_sock *po, int pow_off)
12151210 {
12161211 int idx, len;
12171212
1218
- len = po->rx_ring.frame_max + 1;
1219
- idx = po->rx_ring.head;
1213
+ len = READ_ONCE(po->rx_ring.frame_max) + 1;
1214
+ idx = READ_ONCE(po->rx_ring.head);
12201215 if (pow_off)
12211216 idx += len >> pow_off;
12221217 if (idx >= len)
....@@ -1224,12 +1219,12 @@
12241219 return packet_lookup_frame(po, &po->rx_ring, idx, TP_STATUS_KERNEL);
12251220 }
12261221
1227
-static bool __tpacket_v3_has_room(struct packet_sock *po, int pow_off)
1222
+static bool __tpacket_v3_has_room(const struct packet_sock *po, int pow_off)
12281223 {
12291224 int idx, len;
12301225
1231
- len = po->rx_ring.prb_bdqc.knum_blocks;
1232
- idx = po->rx_ring.prb_bdqc.kactive_blk_num;
1226
+ len = READ_ONCE(po->rx_ring.prb_bdqc.knum_blocks);
1227
+ idx = READ_ONCE(po->rx_ring.prb_bdqc.kactive_blk_num);
12331228 if (pow_off)
12341229 idx += len >> pow_off;
12351230 if (idx >= len)
....@@ -1237,15 +1232,18 @@
12371232 return prb_lookup_block(po, &po->rx_ring, idx, TP_STATUS_KERNEL);
12381233 }
12391234
1240
-static int __packet_rcv_has_room(struct packet_sock *po, struct sk_buff *skb)
1235
+static int __packet_rcv_has_room(const struct packet_sock *po,
1236
+ const struct sk_buff *skb)
12411237 {
1242
- struct sock *sk = &po->sk;
1238
+ const struct sock *sk = &po->sk;
12431239 int ret = ROOM_NONE;
12441240
12451241 if (po->prot_hook.func != tpacket_rcv) {
1246
- int avail = sk->sk_rcvbuf - atomic_read(&sk->sk_rmem_alloc)
1247
- - (skb ? skb->truesize : 0);
1248
- if (avail > (sk->sk_rcvbuf >> ROOM_POW_OFF))
1242
+ int rcvbuf = READ_ONCE(sk->sk_rcvbuf);
1243
+ int avail = rcvbuf - atomic_read(&sk->sk_rmem_alloc)
1244
+ - (skb ? skb->truesize : 0);
1245
+
1246
+ if (avail > (rcvbuf >> ROOM_POW_OFF))
12491247 return ROOM_NORMAL;
12501248 else if (avail > 0)
12511249 return ROOM_LOW;
....@@ -1270,17 +1268,22 @@
12701268
12711269 static int packet_rcv_has_room(struct packet_sock *po, struct sk_buff *skb)
12721270 {
1273
- int ret;
1274
- bool has_room;
1271
+ int pressure, ret;
12751272
1276
- spin_lock_bh(&po->sk.sk_receive_queue.lock);
12771273 ret = __packet_rcv_has_room(po, skb);
1278
- has_room = ret == ROOM_NORMAL;
1279
- if (po->pressure == has_room)
1280
- po->pressure = !has_room;
1281
- spin_unlock_bh(&po->sk.sk_receive_queue.lock);
1274
+ pressure = ret != ROOM_NORMAL;
1275
+
1276
+ if (READ_ONCE(po->pressure) != pressure)
1277
+ WRITE_ONCE(po->pressure, pressure);
12821278
12831279 return ret;
1280
+}
1281
+
1282
+static void packet_rcv_try_clear_pressure(struct packet_sock *po)
1283
+{
1284
+ if (READ_ONCE(po->pressure) &&
1285
+ __packet_rcv_has_room(po, NULL) == ROOM_NORMAL)
1286
+ WRITE_ONCE(po->pressure, 0);
12841287 }
12851288
12861289 static void packet_sock_destruct(struct sock *sk)
....@@ -1356,7 +1359,7 @@
13561359 struct packet_sock *po, *po_next, *po_skip = NULL;
13571360 unsigned int i, j, room = ROOM_NONE;
13581361
1359
- po = pkt_sk(f->arr[idx]);
1362
+ po = pkt_sk(rcu_dereference(f->arr[idx]));
13601363
13611364 if (try_self) {
13621365 room = packet_rcv_has_room(po, skb);
....@@ -1368,8 +1371,8 @@
13681371
13691372 i = j = min_t(int, po->rollover->sock, num - 1);
13701373 do {
1371
- po_next = pkt_sk(f->arr[i]);
1372
- if (po_next != po_skip && !po_next->pressure &&
1374
+ po_next = pkt_sk(rcu_dereference(f->arr[i]));
1375
+ if (po_next != po_skip && !READ_ONCE(po_next->pressure) &&
13731376 packet_rcv_has_room(po_next, skb) == ROOM_NORMAL) {
13741377 if (i != j)
13751378 po->rollover->sock = i;
....@@ -1463,7 +1466,7 @@
14631466 if (fanout_has_flag(f, PACKET_FANOUT_FLAG_ROLLOVER))
14641467 idx = fanout_demux_rollover(f, skb, idx, true, num);
14651468
1466
- po = pkt_sk(f->arr[idx]);
1469
+ po = pkt_sk(rcu_dereference(f->arr[idx]));
14671470 return po->prot_hook.func(skb, dev, &po->prot_hook, orig_dev);
14681471 }
14691472
....@@ -1477,7 +1480,7 @@
14771480 struct packet_fanout *f = po->fanout;
14781481
14791482 spin_lock(&f->lock);
1480
- f->arr[f->num_members] = sk;
1483
+ rcu_assign_pointer(f->arr[f->num_members], sk);
14811484 smp_wmb();
14821485 f->num_members++;
14831486 if (f->num_members == 1)
....@@ -1492,11 +1495,14 @@
14921495
14931496 spin_lock(&f->lock);
14941497 for (i = 0; i < f->num_members; i++) {
1495
- if (f->arr[i] == sk)
1498
+ if (rcu_dereference_protected(f->arr[i],
1499
+ lockdep_is_held(&f->lock)) == sk)
14961500 break;
14971501 }
14981502 BUG_ON(i >= f->num_members);
1499
- f->arr[i] = f->arr[f->num_members - 1];
1503
+ rcu_assign_pointer(f->arr[i],
1504
+ rcu_dereference_protected(f->arr[f->num_members - 1],
1505
+ lockdep_is_held(&f->lock)));
15001506 f->num_members--;
15011507 if (f->num_members == 0)
15021508 __dev_remove_pack(&f->prot_hook);
....@@ -1539,7 +1545,7 @@
15391545 }
15401546 }
15411547
1542
-static int fanout_set_data_cbpf(struct packet_sock *po, char __user *data,
1548
+static int fanout_set_data_cbpf(struct packet_sock *po, sockptr_t data,
15431549 unsigned int len)
15441550 {
15451551 struct bpf_prog *new;
....@@ -1548,10 +1554,10 @@
15481554
15491555 if (sock_flag(&po->sk, SOCK_FILTER_LOCKED))
15501556 return -EPERM;
1551
- if (len != sizeof(fprog))
1552
- return -EINVAL;
1553
- if (copy_from_user(&fprog, data, len))
1554
- return -EFAULT;
1557
+
1558
+ ret = copy_bpf_fprog_from_user(&fprog, data, len);
1559
+ if (ret)
1560
+ return ret;
15551561
15561562 ret = bpf_prog_create_from_user(&new, &fprog, NULL, false);
15571563 if (ret)
....@@ -1561,7 +1567,7 @@
15611567 return 0;
15621568 }
15631569
1564
-static int fanout_set_data_ebpf(struct packet_sock *po, char __user *data,
1570
+static int fanout_set_data_ebpf(struct packet_sock *po, sockptr_t data,
15651571 unsigned int len)
15661572 {
15671573 struct bpf_prog *new;
....@@ -1571,7 +1577,7 @@
15711577 return -EPERM;
15721578 if (len != sizeof(fd))
15731579 return -EINVAL;
1574
- if (copy_from_user(&fd, data, len))
1580
+ if (copy_from_sockptr(&fd, data, len))
15751581 return -EFAULT;
15761582
15771583 new = bpf_prog_get_type(fd, BPF_PROG_TYPE_SOCKET_FILTER);
....@@ -1582,7 +1588,7 @@
15821588 return 0;
15831589 }
15841590
1585
-static int fanout_set_data(struct packet_sock *po, char __user *data,
1591
+static int fanout_set_data(struct packet_sock *po, sockptr_t data,
15861592 unsigned int len)
15871593 {
15881594 switch (po->fanout->type) {
....@@ -1634,13 +1640,15 @@
16341640 return false;
16351641 }
16361642
1637
-static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
1643
+static int fanout_add(struct sock *sk, struct fanout_args *args)
16381644 {
16391645 struct packet_rollover *rollover = NULL;
16401646 struct packet_sock *po = pkt_sk(sk);
1647
+ u16 type_flags = args->type_flags;
16411648 struct packet_fanout *f, *match;
16421649 u8 type = type_flags & 0xff;
16431650 u8 flags = type_flags >> 8;
1651
+ u16 id = args->id;
16441652 int err;
16451653
16461654 switch (type) {
....@@ -1698,11 +1706,21 @@
16981706 }
16991707 }
17001708 err = -EINVAL;
1701
- if (match && match->flags != flags)
1702
- goto out;
1703
- if (!match) {
1709
+ if (match) {
1710
+ if (match->flags != flags)
1711
+ goto out;
1712
+ if (args->max_num_members &&
1713
+ args->max_num_members != match->max_num_members)
1714
+ goto out;
1715
+ } else {
1716
+ if (args->max_num_members > PACKET_FANOUT_MAX)
1717
+ goto out;
1718
+ if (!args->max_num_members)
1719
+ /* legacy PACKET_FANOUT_MAX */
1720
+ args->max_num_members = 256;
17041721 err = -ENOMEM;
1705
- match = kzalloc(sizeof(*match), GFP_KERNEL);
1722
+ match = kvzalloc(struct_size(match, arr, args->max_num_members),
1723
+ GFP_KERNEL);
17061724 if (!match)
17071725 goto out;
17081726 write_pnet(&match->net, sock_net(sk));
....@@ -1719,6 +1737,7 @@
17191737 match->prot_hook.af_packet_priv = match;
17201738 match->prot_hook.af_packet_net = read_pnet(&match->net);
17211739 match->prot_hook.id_match = match_fanout_group;
1740
+ match->max_num_members = args->max_num_members;
17221741 list_add(&match->list, &fanout_list);
17231742 }
17241743 err = -EINVAL;
....@@ -1729,7 +1748,7 @@
17291748 match->prot_hook.type == po->prot_hook.type &&
17301749 match->prot_hook.dev == po->prot_hook.dev) {
17311750 err = -ENOSPC;
1732
- if (refcount_read(&match->sk_ref) < PACKET_FANOUT_MAX) {
1751
+ if (refcount_read(&match->sk_ref) < match->max_num_members) {
17331752 __dev_remove_pack(&po->prot_hook);
17341753
17351754 /* Paired with packet_setsockopt(PACKET_FANOUT_DATA) */
....@@ -1746,7 +1765,7 @@
17461765
17471766 if (err && !refcount_read(&match->sk_ref)) {
17481767 list_del(&match->list);
1749
- kfree(match);
1768
+ kvfree(match);
17501769 }
17511770
17521771 out:
....@@ -1836,7 +1855,7 @@
18361855 skb_dst_drop(skb);
18371856
18381857 /* drop conntrack reference */
1839
- nf_reset(skb);
1858
+ nf_reset_ct(skb);
18401859
18411860 spkt = &PACKET_SKB_CB(skb)->sa.pkt;
18421861
....@@ -1864,6 +1883,16 @@
18641883 return 0;
18651884 }
18661885
1886
+static void packet_parse_headers(struct sk_buff *skb, struct socket *sock)
1887
+{
1888
+ if ((!skb->protocol || skb->protocol == htons(ETH_P_ALL)) &&
1889
+ sock->type == SOCK_RAW) {
1890
+ skb_reset_mac_header(skb);
1891
+ skb->protocol = dev_parse_header_protocol(skb);
1892
+ }
1893
+
1894
+ skb_probe_transport_header(skb);
1895
+}
18671896
18681897 /*
18691898 * Output a raw packet to a device layer. This bypasses all the other
....@@ -1979,12 +2008,12 @@
19792008 skb->mark = sk->sk_mark;
19802009 skb->tstamp = sockc.transmit_time;
19812010
1982
- sock_tx_timestamp(sk, sockc.tsflags, &skb_shinfo(skb)->tx_flags);
2011
+ skb_setup_tx_timestamp(skb, sockc.tsflags);
19832012
19842013 if (unlikely(extra_len == 4))
19852014 skb->no_fcs = 1;
19862015
1987
- skb_probe_transport_header(skb, 0);
2016
+ packet_parse_headers(skb, sock);
19882017
19892018 dev_queue_xmit(skb);
19902019 rcu_read_unlock();
....@@ -2061,7 +2090,7 @@
20612090
20622091 skb->dev = dev;
20632092
2064
- if (dev->header_ops) {
2093
+ if (dev_has_header(dev)) {
20652094 /* The device has an explicit notion of ll header,
20662095 * exported to higher levels.
20672096 *
....@@ -2126,7 +2155,7 @@
21262155 skb_dst_drop(skb);
21272156
21282157 /* drop conntrack reference */
2129
- nf_reset(skb);
2158
+ nf_reset_ct(skb);
21302159
21312160 spin_lock(&sk->sk_receive_queue.lock);
21322161 po->stats.stats1.tp_packets++;
....@@ -2138,10 +2167,8 @@
21382167
21392168 drop_n_acct:
21402169 is_drop_n_account = true;
2141
- spin_lock(&sk->sk_receive_queue.lock);
2142
- po->stats.stats1.tp_drops++;
2170
+ atomic_inc(&po->tp_drops);
21432171 atomic_inc(&sk->sk_drops);
2144
- spin_unlock(&sk->sk_receive_queue.lock);
21452172
21462173 drop_n_restore:
21472174 if (skb_head != skb->data && skb_shared(skb)) {
....@@ -2170,7 +2197,7 @@
21702197 unsigned short macoff, hdrlen;
21712198 unsigned int netoff;
21722199 struct sk_buff *copy_skb = NULL;
2173
- struct timespec ts;
2200
+ struct timespec64 ts;
21742201 __u32 ts_status;
21752202 bool is_drop_n_account = false;
21762203 unsigned int slot_id = 0;
....@@ -2192,7 +2219,7 @@
21922219 if (!net_eq(dev_net(dev), sock_net(sk)))
21932220 goto drop;
21942221
2195
- if (dev->header_ops) {
2222
+ if (dev_has_header(dev)) {
21962223 if (sk->sk_type != SOCK_DGRAM)
21972224 skb_push(skb, skb->data - skb_mac_header(skb));
21982225 else if (skb->pkt_type == PACKET_OUTGOING) {
....@@ -2207,11 +2234,16 @@
22072234 if (!res)
22082235 goto drop_n_restore;
22092236
2237
+ /* If we are flooded, just give up */
2238
+ if (__packet_rcv_has_room(po, skb) == ROOM_NONE) {
2239
+ atomic_inc(&po->tp_drops);
2240
+ goto drop_n_restore;
2241
+ }
2242
+
22102243 if (skb->ip_summed == CHECKSUM_PARTIAL)
22112244 status |= TP_STATUS_CSUMNOTREADY;
22122245 else if (skb->pkt_type != PACKET_OUTGOING &&
2213
- (skb->ip_summed == CHECKSUM_COMPLETE ||
2214
- skb_csum_unnecessary(skb)))
2246
+ skb_csum_unnecessary(skb))
22152247 status |= TP_STATUS_CSUM_VALID;
22162248
22172249 if (snaplen > res)
....@@ -2232,9 +2264,7 @@
22322264 macoff = netoff - maclen;
22332265 }
22342266 if (netoff > USHRT_MAX) {
2235
- spin_lock(&sk->sk_receive_queue.lock);
2236
- po->stats.stats1.tp_drops++;
2237
- spin_unlock(&sk->sk_receive_queue.lock);
2267
+ atomic_inc(&po->tp_drops);
22382268 goto drop_n_restore;
22392269 }
22402270 if (po->tp_version <= TPACKET_V2) {
....@@ -2247,8 +2277,11 @@
22472277 copy_skb = skb_get(skb);
22482278 skb_head = skb->data;
22492279 }
2250
- if (copy_skb)
2280
+ if (copy_skb) {
2281
+ memset(&PACKET_SKB_CB(copy_skb)->sa.ll, 0,
2282
+ sizeof(PACKET_SKB_CB(copy_skb)->sa.ll));
22512283 skb_set_owner_r(copy_skb, sk);
2284
+ }
22522285 }
22532286 snaplen = po->rx_ring.frame_size - macoff;
22542287 if ((int)snaplen < 0) {
....@@ -2300,7 +2333,7 @@
23002333 * Anyways, moving it for V1/V2 only as V3 doesn't need this
23012334 * at packet level.
23022335 */
2303
- if (po->stats.stats1.tp_drops)
2336
+ if (atomic_read(&po->tp_drops))
23042337 status |= TP_STATUS_LOSING;
23052338 }
23062339
....@@ -2313,8 +2346,13 @@
23132346
23142347 skb_copy_bits(skb, 0, h.raw + macoff, snaplen);
23152348
2316
- if (!(ts_status = tpacket_get_timestamp(skb, &ts, po->tp_tstamp)))
2317
- getnstimeofday(&ts);
2349
+ /* Always timestamp; prefer an existing software timestamp taken
2350
+ * closer to the time of capture.
2351
+ */
2352
+ ts_status = tpacket_get_timestamp(skb, &ts,
2353
+ po->tp_tstamp | SOF_TIMESTAMPING_SOFTWARE);
2354
+ if (!ts_status)
2355
+ ktime_get_real_ts64(&ts);
23182356
23192357 status |= ts_status;
23202358
....@@ -2413,9 +2451,9 @@
24132451 return 0;
24142452
24152453 drop_n_account:
2416
- is_drop_n_account = true;
2417
- po->stats.stats1.tp_drops++;
24182454 spin_unlock(&sk->sk_receive_queue.lock);
2455
+ atomic_inc(&po->tp_drops);
2456
+ is_drop_n_account = true;
24192457
24202458 sk->sk_data_ready(sk);
24212459 kfree_skb(copy_skb);
....@@ -2441,15 +2479,6 @@
24412479 }
24422480
24432481 sock_wfree(skb);
2444
-}
2445
-
2446
-static void tpacket_set_protocol(const struct net_device *dev,
2447
- struct sk_buff *skb)
2448
-{
2449
- if (dev->type == ARPHRD_ETHER) {
2450
- skb_reset_mac_header(skb);
2451
- skb->protocol = eth_hdr(skb)->h_proto;
2452
- }
24532482 }
24542483
24552484 static int __packet_snd_vnet_parse(struct virtio_net_hdr *vnet_hdr, size_t len)
....@@ -2499,7 +2528,7 @@
24992528 skb->priority = po->sk.sk_priority;
25002529 skb->mark = po->sk.sk_mark;
25012530 skb->tstamp = sockc->transmit_time;
2502
- sock_tx_timestamp(&po->sk, sockc->tsflags, &skb_shinfo(skb)->tx_flags);
2531
+ skb_setup_tx_timestamp(skb, sockc->tsflags);
25032532 skb_zcopy_set_nouarg(skb, ph.raw);
25042533
25052534 skb_reserve(skb, hlen);
....@@ -2522,8 +2551,6 @@
25222551 return err;
25232552 if (!dev_validate_header(dev, skb->data, hdrlen))
25242553 return -EINVAL;
2525
- if (!skb->protocol)
2526
- tpacket_set_protocol(dev, skb);
25272554
25282555 data += hdrlen;
25292556 to_write -= hdrlen;
....@@ -2558,7 +2585,7 @@
25582585 len = ((to_write > len_max) ? len_max : to_write);
25592586 }
25602587
2561
- skb_probe_transport_header(skb, 0);
2588
+ packet_parse_headers(skb, sock);
25622589
25632590 return tp_len;
25642591 }
....@@ -2789,8 +2816,9 @@
27892816
27902817 status = TP_STATUS_SEND_REQUEST;
27912818 err = po->xmit(skb);
2792
- if (unlikely(err > 0)) {
2793
- err = net_xmit_errno(err);
2819
+ if (unlikely(err != 0)) {
2820
+ if (err > 0)
2821
+ err = net_xmit_errno(err);
27942822 if (err && __packet_get_status(po, ph) ==
27952823 TP_STATUS_AVAILABLE) {
27962824 /* skb was destructed already */
....@@ -2957,13 +2985,13 @@
29572985 if (err)
29582986 goto out_free;
29592987
2960
- if (sock->type == SOCK_RAW &&
2961
- !dev_validate_header(dev, skb->data, len)) {
2988
+ if ((sock->type == SOCK_RAW &&
2989
+ !dev_validate_header(dev, skb->data, len)) || !skb->len) {
29622990 err = -EINVAL;
29632991 goto out_free;
29642992 }
29652993
2966
- sock_tx_timestamp(sk, sockc.tsflags, &skb_shinfo(skb)->tx_flags);
2994
+ skb_setup_tx_timestamp(skb, sockc.tsflags);
29672995
29682996 if (!vnet_hdr.gso_type && (len > dev->mtu + reserve + extra_len) &&
29692997 !packet_extra_vlan_len_allowed(dev, skb)) {
....@@ -2985,14 +3013,18 @@
29853013 virtio_net_hdr_set_proto(skb, &vnet_hdr);
29863014 }
29873015
2988
- skb_probe_transport_header(skb, reserve);
3016
+ packet_parse_headers(skb, sock);
29893017
29903018 if (unlikely(extra_len == 4))
29913019 skb->no_fcs = 1;
29923020
29933021 err = po->xmit(skb);
2994
- if (err > 0 && (err = net_xmit_errno(err)) != 0)
2995
- goto out_unlock;
3022
+ if (unlikely(err != 0)) {
3023
+ if (err > 0)
3024
+ err = net_xmit_errno(err);
3025
+ if (err)
3026
+ goto out_unlock;
3027
+ }
29963028
29973029 dev_put(dev);
29983030
....@@ -3012,10 +3044,13 @@
30123044 struct sock *sk = sock->sk;
30133045 struct packet_sock *po = pkt_sk(sk);
30143046
3015
- if (po->tx_ring.pg_vec)
3047
+ /* Reading tx_ring.pg_vec without holding pg_vec_lock is racy.
3048
+ * tpacket_snd() will redo the check safely.
3049
+ */
3050
+ if (data_race(po->tx_ring.pg_vec))
30163051 return tpacket_snd(po, msg);
3017
- else
3018
- return packet_snd(sock, msg, len);
3052
+
3053
+ return packet_snd(sock, msg, len);
30193054 }
30203055
30213056 /*
....@@ -3076,7 +3111,7 @@
30763111 kfree(po->rollover);
30773112 if (f) {
30783113 fanout_release_data(f);
3079
- kfree(f);
3114
+ kvfree(f);
30803115 }
30813116 /*
30823117 * Now the socket is dead. No more input will appear.
....@@ -3371,8 +3406,7 @@
33713406 if (skb == NULL)
33723407 goto out;
33733408
3374
- if (pkt_sk(sk)->pressure)
3375
- packet_rcv_has_room(pkt_sk(sk), NULL);
3409
+ packet_rcv_try_clear_pressure(pkt_sk(sk));
33763410
33773411 if (pkt_sk(sk)->has_vnet_hdr) {
33783412 err = packet_rcv_vnet(msg, skb, &len);
....@@ -3407,6 +3441,8 @@
34073441 sock_recv_ts_and_drops(msg, sk, skb);
34083442
34093443 if (msg->msg_name) {
3444
+ const size_t max_len = min(sizeof(skb->cb),
3445
+ sizeof(struct sockaddr_storage));
34103446 int copy_len;
34113447
34123448 /* If the address length field is there to be filled
....@@ -3429,6 +3465,10 @@
34293465 msg->msg_namelen = sizeof(struct sockaddr_ll);
34303466 }
34313467 }
3468
+ if (WARN_ON_ONCE(copy_len > max_len)) {
3469
+ copy_len = max_len;
3470
+ msg->msg_namelen = copy_len;
3471
+ }
34323472 memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa, copy_len);
34333473 }
34343474
....@@ -3439,8 +3479,7 @@
34393479 if (skb->ip_summed == CHECKSUM_PARTIAL)
34403480 aux.tp_status |= TP_STATUS_CSUMNOTREADY;
34413481 else if (skb->pkt_type != PACKET_OUTGOING &&
3442
- (skb->ip_summed == CHECKSUM_COMPLETE ||
3443
- skb_csum_unnecessary(skb)))
3482
+ skb_csum_unnecessary(skb))
34443483 aux.tp_status |= TP_STATUS_CSUM_VALID;
34453484
34463485 aux.tp_len = origlen;
....@@ -3670,7 +3709,8 @@
36703709 }
36713710
36723711 static int
3673
-packet_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen)
3712
+packet_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval,
3713
+ unsigned int optlen)
36743714 {
36753715 struct sock *sk = sock->sk;
36763716 struct packet_sock *po = pkt_sk(sk);
....@@ -3690,7 +3730,7 @@
36903730 return -EINVAL;
36913731 if (len > sizeof(mreq))
36923732 len = sizeof(mreq);
3693
- if (copy_from_user(&mreq, optval, len))
3733
+ if (copy_from_sockptr(&mreq, optval, len))
36943734 return -EFAULT;
36953735 if (len < (mreq.mr_alen + offsetof(struct packet_mreq, mr_address)))
36963736 return -EINVAL;
....@@ -3721,7 +3761,7 @@
37213761 if (optlen < len) {
37223762 ret = -EINVAL;
37233763 } else {
3724
- if (copy_from_user(&req_u.req, optval, len))
3764
+ if (copy_from_sockptr(&req_u.req, optval, len))
37253765 ret = -EFAULT;
37263766 else
37273767 ret = packet_set_ring(sk, &req_u, 0,
....@@ -3736,7 +3776,7 @@
37363776
37373777 if (optlen != sizeof(val))
37383778 return -EINVAL;
3739
- if (copy_from_user(&val, optval, sizeof(val)))
3779
+ if (copy_from_sockptr(&val, optval, sizeof(val)))
37403780 return -EFAULT;
37413781
37423782 pkt_sk(sk)->copy_thresh = val;
....@@ -3748,7 +3788,7 @@
37483788
37493789 if (optlen != sizeof(val))
37503790 return -EINVAL;
3751
- if (copy_from_user(&val, optval, sizeof(val)))
3791
+ if (copy_from_sockptr(&val, optval, sizeof(val)))
37523792 return -EFAULT;
37533793 switch (val) {
37543794 case TPACKET_V1:
....@@ -3774,7 +3814,7 @@
37743814
37753815 if (optlen != sizeof(val))
37763816 return -EINVAL;
3777
- if (copy_from_user(&val, optval, sizeof(val)))
3817
+ if (copy_from_sockptr(&val, optval, sizeof(val)))
37783818 return -EFAULT;
37793819 if (val > INT_MAX)
37803820 return -EINVAL;
....@@ -3794,7 +3834,7 @@
37943834
37953835 if (optlen != sizeof(val))
37963836 return -EINVAL;
3797
- if (copy_from_user(&val, optval, sizeof(val)))
3837
+ if (copy_from_sockptr(&val, optval, sizeof(val)))
37983838 return -EFAULT;
37993839
38003840 lock_sock(sk);
....@@ -3813,7 +3853,7 @@
38133853
38143854 if (optlen < sizeof(val))
38153855 return -EINVAL;
3816
- if (copy_from_user(&val, optval, sizeof(val)))
3856
+ if (copy_from_sockptr(&val, optval, sizeof(val)))
38173857 return -EFAULT;
38183858
38193859 lock_sock(sk);
....@@ -3827,7 +3867,7 @@
38273867
38283868 if (optlen < sizeof(val))
38293869 return -EINVAL;
3830
- if (copy_from_user(&val, optval, sizeof(val)))
3870
+ if (copy_from_sockptr(&val, optval, sizeof(val)))
38313871 return -EFAULT;
38323872
38333873 lock_sock(sk);
....@@ -3843,7 +3883,7 @@
38433883 return -EINVAL;
38443884 if (optlen < sizeof(val))
38453885 return -EINVAL;
3846
- if (copy_from_user(&val, optval, sizeof(val)))
3886
+ if (copy_from_sockptr(&val, optval, sizeof(val)))
38473887 return -EFAULT;
38483888
38493889 lock_sock(sk);
....@@ -3862,7 +3902,7 @@
38623902
38633903 if (optlen != sizeof(val))
38643904 return -EINVAL;
3865
- if (copy_from_user(&val, optval, sizeof(val)))
3905
+ if (copy_from_sockptr(&val, optval, sizeof(val)))
38663906 return -EFAULT;
38673907
38683908 po->tp_tstamp = val;
....@@ -3870,14 +3910,14 @@
38703910 }
38713911 case PACKET_FANOUT:
38723912 {
3873
- int val;
3913
+ struct fanout_args args = { 0 };
38743914
3875
- if (optlen != sizeof(val))
3915
+ if (optlen != sizeof(int) && optlen != sizeof(args))
38763916 return -EINVAL;
3877
- if (copy_from_user(&val, optval, sizeof(val)))
3917
+ if (copy_from_sockptr(&args, optval, optlen))
38783918 return -EFAULT;
38793919
3880
- return fanout_add(sk, val & 0xffff, val >> 16);
3920
+ return fanout_add(sk, &args);
38813921 }
38823922 case PACKET_FANOUT_DATA:
38833923 {
....@@ -3887,13 +3927,27 @@
38873927
38883928 return fanout_set_data(po, optval, optlen);
38893929 }
3930
+ case PACKET_IGNORE_OUTGOING:
3931
+ {
3932
+ int val;
3933
+
3934
+ if (optlen != sizeof(val))
3935
+ return -EINVAL;
3936
+ if (copy_from_sockptr(&val, optval, sizeof(val)))
3937
+ return -EFAULT;
3938
+ if (val < 0 || val > 1)
3939
+ return -EINVAL;
3940
+
3941
+ po->prot_hook.ignore_outgoing = !!val;
3942
+ return 0;
3943
+ }
38903944 case PACKET_TX_HAS_OFF:
38913945 {
38923946 unsigned int val;
38933947
38943948 if (optlen != sizeof(val))
38953949 return -EINVAL;
3896
- if (copy_from_user(&val, optval, sizeof(val)))
3950
+ if (copy_from_sockptr(&val, optval, sizeof(val)))
38973951 return -EFAULT;
38983952
38993953 lock_sock(sk);
....@@ -3912,7 +3966,7 @@
39123966
39133967 if (optlen != sizeof(val))
39143968 return -EINVAL;
3915
- if (copy_from_user(&val, optval, sizeof(val)))
3969
+ if (copy_from_sockptr(&val, optval, sizeof(val)))
39163970 return -EFAULT;
39173971
39183972 po->xmit = val ? packet_direct_xmit : dev_queue_xmit;
....@@ -3933,6 +3987,7 @@
39333987 void *data = &val;
39343988 union tpacket_stats_u st;
39353989 struct tpacket_rollover_stats rstats;
3990
+ int drops;
39363991
39373992 if (level != SOL_PACKET)
39383993 return -ENOPROTOOPT;
....@@ -3949,14 +4004,17 @@
39494004 memcpy(&st, &po->stats, sizeof(st));
39504005 memset(&po->stats, 0, sizeof(po->stats));
39514006 spin_unlock_bh(&sk->sk_receive_queue.lock);
4007
+ drops = atomic_xchg(&po->tp_drops, 0);
39524008
39534009 if (po->tp_version == TPACKET_V3) {
39544010 lv = sizeof(struct tpacket_stats_v3);
3955
- st.stats3.tp_packets += st.stats3.tp_drops;
4011
+ st.stats3.tp_drops = drops;
4012
+ st.stats3.tp_packets += drops;
39564013 data = &st.stats3;
39574014 } else {
39584015 lv = sizeof(struct tpacket_stats);
3959
- st.stats1.tp_packets += st.stats1.tp_drops;
4016
+ st.stats1.tp_drops = drops;
4017
+ st.stats1.tp_packets += drops;
39604018 data = &st.stats1;
39614019 }
39624020
....@@ -4010,6 +4068,9 @@
40104068 ((u32)po->fanout->flags << 24)) :
40114069 0);
40124070 break;
4071
+ case PACKET_IGNORE_OUTGOING:
4072
+ val = po->prot_hook.ignore_outgoing;
4073
+ break;
40134074 case PACKET_ROLLOVER_STATS:
40144075 if (!po->rollover)
40154076 return -EINVAL;
....@@ -4038,28 +4099,6 @@
40384099 return 0;
40394100 }
40404101
4041
-
4042
-#ifdef CONFIG_COMPAT
4043
-static int compat_packet_setsockopt(struct socket *sock, int level, int optname,
4044
- char __user *optval, unsigned int optlen)
4045
-{
4046
- struct packet_sock *po = pkt_sk(sock->sk);
4047
-
4048
- if (level != SOL_PACKET)
4049
- return -ENOPROTOOPT;
4050
-
4051
- if (optname == PACKET_FANOUT_DATA &&
4052
- po->fanout && po->fanout->type == PACKET_FANOUT_CBPF) {
4053
- optval = (char __user *)get_compat_bpf_fprog(optval);
4054
- if (!optval)
4055
- return -EFAULT;
4056
- optlen = sizeof(struct sock_fprog);
4057
- }
4058
-
4059
- return packet_setsockopt(sock, level, optname, optval, optlen);
4060
-}
4061
-#endif
4062
-
40634102 static int packet_notifier(struct notifier_block *this,
40644103 unsigned long msg, void *ptr)
40654104 {
....@@ -4075,7 +4114,7 @@
40754114 case NETDEV_UNREGISTER:
40764115 if (po->mclist)
40774116 packet_dev_mclist_delete(dev, &po->mclist);
4078
- /* fallthrough */
4117
+ fallthrough;
40794118
40804119 case NETDEV_DOWN:
40814120 if (dev->ifindex == po->ifindex) {
....@@ -4135,11 +4174,6 @@
41354174 spin_unlock_bh(&sk->sk_receive_queue.lock);
41364175 return put_user(amount, (int __user *)arg);
41374176 }
4138
- case SIOCGSTAMP:
4139
- return sock_get_timestamp(sk, (struct timeval __user *)arg);
4140
- case SIOCGSTAMPNS:
4141
- return sock_get_timestampns(sk, (struct timespec __user *)arg);
4142
-
41434177 #ifdef CONFIG_INET
41444178 case SIOCADDRT:
41454179 case SIOCDELRT:
....@@ -4177,8 +4211,7 @@
41774211 TP_STATUS_KERNEL))
41784212 mask |= EPOLLIN | EPOLLRDNORM;
41794213 }
4180
- if (po->pressure && __packet_rcv_has_room(po, NULL) == ROOM_NORMAL)
4181
- po->pressure = 0;
4214
+ packet_rcv_try_clear_pressure(po);
41824215 spin_unlock_bh(&sk->sk_receive_queue.lock);
41834216 spin_lock_bh(&sk->sk_write_queue.lock);
41844217 if (po->tx_ring.pg_vec) {
....@@ -4297,7 +4330,7 @@
42974330 struct packet_ring_buffer *rb;
42984331 struct sk_buff_head *rb_queue;
42994332 __be16 num;
4300
- int err = -EINVAL;
4333
+ int err;
43014334 /* Added to avoid minimal code churn */
43024335 struct tpacket_req *req = &req_u->req;
43034336
....@@ -4527,10 +4560,9 @@
45274560 .getname = packet_getname_spkt,
45284561 .poll = datagram_poll,
45294562 .ioctl = packet_ioctl,
4563
+ .gettstamp = sock_gettstamp,
45304564 .listen = sock_no_listen,
45314565 .shutdown = sock_no_shutdown,
4532
- .setsockopt = sock_no_setsockopt,
4533
- .getsockopt = sock_no_getsockopt,
45344566 .sendmsg = packet_sendmsg_spkt,
45354567 .recvmsg = packet_recvmsg,
45364568 .mmap = sock_no_mmap,
....@@ -4548,13 +4580,11 @@
45484580 .getname = packet_getname,
45494581 .poll = packet_poll,
45504582 .ioctl = packet_ioctl,
4583
+ .gettstamp = sock_gettstamp,
45514584 .listen = sock_no_listen,
45524585 .shutdown = sock_no_shutdown,
45534586 .setsockopt = packet_setsockopt,
45544587 .getsockopt = packet_getsockopt,
4555
-#ifdef CONFIG_COMPAT
4556
- .compat_setsockopt = compat_packet_setsockopt,
4557
-#endif
45584588 .sendmsg = packet_sendmsg,
45594589 .recvmsg = packet_recvmsg,
45604590 .mmap = packet_mmap,
....@@ -4631,9 +4661,11 @@
46314661 mutex_init(&net->packet.sklist_lock);
46324662 INIT_HLIST_HEAD(&net->packet.sklist);
46334663
4664
+#ifdef CONFIG_PROC_FS
46344665 if (!proc_create_net("packet", 0, net->proc_net, &packet_seq_ops,
46354666 sizeof(struct seq_net_private)))
46364667 return -ENOMEM;
4668
+#endif /* CONFIG_PROC_FS */
46374669
46384670 return 0;
46394671 }