hc
2023-12-11 6778948f9de86c3cfaf36725a7c87dcff9ba247f
kernel/net/packet/af_packet.c
....@@ -1,3 +1,4 @@
1
+// SPDX-License-Identifier: GPL-2.0-or-later
12 /*
23 * INET An implementation of the TCP/IP protocol suite for the LINUX
34 * operating system. INET is implemented using the BSD Socket
....@@ -43,13 +44,6 @@
4344 * Chetan Loke : Implemented TPACKET_V3 block abstraction
4445 * layer.
4546 * Copyright (C) 2011, <lokec@ccs.neu.edu>
46
- *
47
- *
48
- * This program is free software; you can redistribute it and/or
49
- * modify it under the terms of the GNU General Public License
50
- * as published by the Free Software Foundation; either version
51
- * 2 of the License, or (at your option) any later version.
52
- *
5347 */
5448
5549 #include <linux/types.h>
....@@ -99,52 +93,56 @@
9993
10094 /*
10195 Assumptions:
102
- - if device has no dev->hard_header routine, it adds and removes ll header
103
- inside itself. In this case ll header is invisible outside of device,
104
- but higher levels still should reserve dev->hard_header_len.
105
- Some devices are enough clever to reallocate skb, when header
106
- will not fit to reserved space (tunnel), another ones are silly
107
- (PPP).
96
+ - If the device has no dev->header_ops->create, there is no LL header
97
+ visible above the device. In this case, its hard_header_len should be 0.
98
+ The device may prepend its own header internally. In this case, its
99
+ needed_headroom should be set to the space needed for it to add its
100
+ internal header.
101
+ For example, a WiFi driver pretending to be an Ethernet driver should
102
+ set its hard_header_len to be the Ethernet header length, and set its
103
+ needed_headroom to be (the real WiFi header length - the fake Ethernet
104
+ header length).
108105 - packet socket receives packets with pulled ll header,
109106 so that SOCK_RAW should push it back.
110107
111108 On receive:
112109 -----------
113110
114
-Incoming, dev->hard_header!=NULL
111
+Incoming, dev_has_header(dev) == true
115112 mac_header -> ll header
116113 data -> data
117114
118
-Outgoing, dev->hard_header!=NULL
115
+Outgoing, dev_has_header(dev) == true
119116 mac_header -> ll header
120117 data -> ll header
121118
122
-Incoming, dev->hard_header==NULL
123
- mac_header -> UNKNOWN position. It is very likely, that it points to ll
124
- header. PPP makes it, that is wrong, because introduce
125
- assymetry between rx and tx paths.
119
+Incoming, dev_has_header(dev) == false
120
+ mac_header -> data
121
+ However drivers often make it point to the ll header.
122
+ This is incorrect because the ll header should be invisible to us.
126123 data -> data
127124
128
-Outgoing, dev->hard_header==NULL
129
- mac_header -> data. ll header is still not built!
125
+Outgoing, dev_has_header(dev) == false
126
+ mac_header -> data. ll header is invisible to us.
130127 data -> data
131128
132129 Resume
133
- If dev->hard_header==NULL we are unlikely to restore sensible ll header.
130
+ If dev_has_header(dev) == false we are unable to restore the ll header,
131
+ because it is invisible to us.
134132
135133
136134 On transmit:
137135 ------------
138136
139
-dev->hard_header != NULL
137
+dev->header_ops != NULL
140138 mac_header -> ll header
141139 data -> ll header
142140
143
-dev->hard_header == NULL (ll header is added by device, we cannot control it)
141
+dev->header_ops == NULL (ll header is invisible to us)
144142 mac_header -> data
145143 data -> data
146144
147
- We should set nh.raw on output to correct posistion,
145
+ We should set network_header on output to the correct position,
148146 packet classifier depends on it.
149147 */
150148
....@@ -183,7 +181,6 @@
183181 #define BLOCK_LEN(x) ((x)->hdr.bh1.blk_len)
184182 #define BLOCK_SNUM(x) ((x)->hdr.bh1.seq_num)
185183 #define BLOCK_O2PRIV(x) ((x)->offset_to_priv)
186
-#define BLOCK_PRIV(x) ((void *)((char *)(x) + BLOCK_O2PRIV(x)))
187184
188185 struct packet_sock;
189186 static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
....@@ -275,24 +272,22 @@
275272 return po->xmit == packet_direct_xmit;
276273 }
277274
278
-static u16 __packet_pick_tx_queue(struct net_device *dev, struct sk_buff *skb,
279
- struct net_device *sb_dev)
280
-{
281
- return dev_pick_tx_cpu_id(dev, skb, sb_dev, NULL);
282
-}
283
-
284275 static u16 packet_pick_tx_queue(struct sk_buff *skb)
285276 {
286277 struct net_device *dev = skb->dev;
287278 const struct net_device_ops *ops = dev->netdev_ops;
279
+ int cpu = raw_smp_processor_id();
288280 u16 queue_index;
289281
282
+#ifdef CONFIG_XPS
283
+ skb->sender_cpu = cpu + 1;
284
+#endif
285
+ skb_record_rx_queue(skb, cpu % dev->real_num_tx_queues);
290286 if (ops->ndo_select_queue) {
291
- queue_index = ops->ndo_select_queue(dev, skb, NULL,
292
- __packet_pick_tx_queue);
287
+ queue_index = ops->ndo_select_queue(dev, skb, NULL);
293288 queue_index = netdev_cap_txqueue(dev, queue_index);
294289 } else {
295
- queue_index = __packet_pick_tx_queue(dev, skb, NULL);
290
+ queue_index = netdev_pick_tx(dev, skb, NULL);
296291 }
297292
298293 return queue_index;
....@@ -392,7 +387,7 @@
392387 smp_wmb();
393388 }
394389
395
-static int __packet_get_status(struct packet_sock *po, void *frame)
390
+static int __packet_get_status(const struct packet_sock *po, void *frame)
396391 {
397392 union tpacket_uhdr h;
398393
....@@ -416,17 +411,18 @@
416411 }
417412 }
418413
419
-static __u32 tpacket_get_timestamp(struct sk_buff *skb, struct timespec *ts,
414
+static __u32 tpacket_get_timestamp(struct sk_buff *skb, struct timespec64 *ts,
420415 unsigned int flags)
421416 {
422417 struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb);
423418
424419 if (shhwtstamps &&
425420 (flags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
426
- ktime_to_timespec_cond(shhwtstamps->hwtstamp, ts))
421
+ ktime_to_timespec64_cond(shhwtstamps->hwtstamp, ts))
427422 return TP_STATUS_TS_RAW_HARDWARE;
428423
429
- if (ktime_to_timespec_cond(skb->tstamp, ts))
424
+ if ((flags & SOF_TIMESTAMPING_SOFTWARE) &&
425
+ ktime_to_timespec64_cond(skb->tstamp, ts))
430426 return TP_STATUS_TS_SOFTWARE;
431427
432428 return 0;
....@@ -436,13 +432,20 @@
436432 struct sk_buff *skb)
437433 {
438434 union tpacket_uhdr h;
439
- struct timespec ts;
435
+ struct timespec64 ts;
440436 __u32 ts_status;
441437
442438 if (!(ts_status = tpacket_get_timestamp(skb, &ts, po->tp_tstamp)))
443439 return 0;
444440
445441 h.raw = frame;
442
+ /*
443
+ * versions 1 through 3 overflow the timestamps in y2106, since they
444
+ * all store the seconds in a 32-bit unsigned integer.
445
+ * If we create a version 4, that should have a 64-bit timestamp,
446
+ * either 64-bit seconds + 32-bit nanoseconds, or just 64-bit
447
+ * nanoseconds.
448
+ */
446449 switch (po->tp_version) {
447450 case TPACKET_V1:
448451 h.h1->tp_sec = ts.tv_sec;
....@@ -468,10 +471,10 @@
468471 return ts_status;
469472 }
470473
471
-static void *packet_lookup_frame(struct packet_sock *po,
472
- struct packet_ring_buffer *rb,
473
- unsigned int position,
474
- int status)
474
+static void *packet_lookup_frame(const struct packet_sock *po,
475
+ const struct packet_ring_buffer *rb,
476
+ unsigned int position,
477
+ int status)
475478 {
476479 unsigned int pg_vec_pos, frame_offset;
477480 union tpacket_uhdr h;
....@@ -528,7 +531,7 @@
528531 int blk_size_in_bytes)
529532 {
530533 struct net_device *dev;
531
- unsigned int mbits = 0, msec = 0, div = 0, tmo = 0;
534
+ unsigned int mbits, div;
532535 struct ethtool_link_ksettings ecmd;
533536 int err;
534537
....@@ -540,31 +543,25 @@
540543 }
541544 err = __ethtool_get_link_ksettings(dev, &ecmd);
542545 rtnl_unlock();
543
- if (!err) {
544
- /*
545
- * If the link speed is so slow you don't really
546
- * need to worry about perf anyways
547
- */
548
- if (ecmd.base.speed < SPEED_1000 ||
549
- ecmd.base.speed == SPEED_UNKNOWN) {
550
- return DEFAULT_PRB_RETIRE_TOV;
551
- } else {
552
- msec = 1;
553
- div = ecmd.base.speed / 1000;
554
- }
555
- } else
546
+ if (err)
556547 return DEFAULT_PRB_RETIRE_TOV;
557548
549
+ /* If the link speed is so slow you don't really
550
+ * need to worry about perf anyways
551
+ */
552
+ if (ecmd.base.speed < SPEED_1000 ||
553
+ ecmd.base.speed == SPEED_UNKNOWN)
554
+ return DEFAULT_PRB_RETIRE_TOV;
555
+
556
+ div = ecmd.base.speed / 1000;
558557 mbits = (blk_size_in_bytes * 8) / (1024 * 1024);
559558
560559 if (div)
561560 mbits /= div;
562561
563
- tmo = mbits * msec;
564
-
565562 if (div)
566
- return tmo+1;
567
- return tmo;
563
+ return mbits + 1;
564
+ return mbits;
568565 }
569566
570567 static void prb_init_ft_ops(struct tpacket_kbdq_core *p1,
....@@ -600,6 +597,7 @@
600597 req_u->req3.tp_block_size);
601598 p1->tov_in_jiffies = msecs_to_jiffies(p1->retire_blk_tov);
602599 p1->blk_sizeof_priv = req_u->req3.tp_sizeof_priv;
600
+ rwlock_init(&p1->blk_fill_in_prog_lock);
603601
604602 p1->max_frame_len = p1->kblk_size - BLK_PLUS_PRIV(p1->blk_sizeof_priv);
605603 prb_init_ft_ops(p1, req_u);
....@@ -666,10 +664,9 @@
666664 *
667665 */
668666 if (BLOCK_NUM_PKTS(pbd)) {
669
- while (atomic_read(&pkc->blk_fill_in_prog)) {
670
- /* Waiting for skb_copy_bits to finish... */
671
- cpu_relax();
672
- }
667
+ /* Waiting for skb_copy_bits to finish... */
668
+ write_lock(&pkc->blk_fill_in_prog_lock);
669
+ write_unlock(&pkc->blk_fill_in_prog_lock);
673670 }
674671
675672 if (pkc->last_kactive_blk_num == pkc->kactive_blk_num) {
....@@ -767,7 +764,7 @@
767764 struct tpacket_hdr_v1 *h1 = &pbd1->hdr.bh1;
768765 struct sock *sk = &po->sk;
769766
770
- if (po->stats.stats3.tp_drops)
767
+ if (atomic_read(&po->tp_drops))
771768 status |= TP_STATUS_LOSING;
772769
773770 last_pkt = (struct tpacket3_hdr *)pkc1->prev;
....@@ -783,8 +780,8 @@
783780 * It shouldn't really happen as we don't close empty
784781 * blocks. See prb_retire_rx_blk_timer_expired().
785782 */
786
- struct timespec ts;
787
- getnstimeofday(&ts);
783
+ struct timespec64 ts;
784
+ ktime_get_real_ts64(&ts);
788785 h1->ts_last_pkt.ts_sec = ts.tv_sec;
789786 h1->ts_last_pkt.ts_nsec = ts.tv_nsec;
790787 }
....@@ -814,7 +811,7 @@
814811 static void prb_open_block(struct tpacket_kbdq_core *pkc1,
815812 struct tpacket_block_desc *pbd1)
816813 {
817
- struct timespec ts;
814
+ struct timespec64 ts;
818815 struct tpacket_hdr_v1 *h1 = &pbd1->hdr.bh1;
819816
820817 smp_rmb();
....@@ -827,7 +824,7 @@
827824 BLOCK_NUM_PKTS(pbd1) = 0;
828825 BLOCK_LEN(pbd1) = BLK_PLUS_PRIV(pkc1->blk_sizeof_priv);
829826
830
- getnstimeofday(&ts);
827
+ ktime_get_real_ts64(&ts);
831828
832829 h1->ts_first_pkt.ts_sec = ts.tv_sec;
833830 h1->ts_first_pkt.ts_nsec = ts.tv_nsec;
....@@ -928,10 +925,9 @@
928925 * the timer-handler already handled this case.
929926 */
930927 if (!(status & TP_STATUS_BLK_TMO)) {
931
- while (atomic_read(&pkc->blk_fill_in_prog)) {
932
- /* Waiting for skb_copy_bits to finish... */
933
- cpu_relax();
934
- }
928
+ /* Waiting for skb_copy_bits to finish... */
929
+ write_lock(&pkc->blk_fill_in_prog_lock);
930
+ write_unlock(&pkc->blk_fill_in_prog_lock);
935931 }
936932 prb_close_block(pkc, pbd, po, status);
937933 return;
....@@ -952,7 +948,8 @@
952948 __releases(&pkc->blk_fill_in_prog_lock)
953949 {
954950 struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(rb);
955
- atomic_dec(&pkc->blk_fill_in_prog);
951
+
952
+ read_unlock(&pkc->blk_fill_in_prog_lock);
956953 }
957954
958955 static void prb_fill_rxhash(struct tpacket_kbdq_core *pkc,
....@@ -1007,14 +1004,13 @@
10071004 pkc->nxt_offset += TOTAL_PKT_LEN_INCL_ALIGN(len);
10081005 BLOCK_LEN(pbd) += TOTAL_PKT_LEN_INCL_ALIGN(len);
10091006 BLOCK_NUM_PKTS(pbd) += 1;
1010
- atomic_inc(&pkc->blk_fill_in_prog);
1007
+ read_lock(&pkc->blk_fill_in_prog_lock);
10111008 prb_run_all_ft_ops(pkc, ppd);
10121009 }
10131010
10141011 /* Assumes caller has the sk->rx_queue.lock */
10151012 static void *__packet_lookup_frame_in_block(struct packet_sock *po,
10161013 struct sk_buff *skb,
1017
- int status,
10181014 unsigned int len
10191015 )
10201016 {
....@@ -1086,7 +1082,7 @@
10861082 po->rx_ring.head, status);
10871083 return curr;
10881084 case TPACKET_V3:
1089
- return __packet_lookup_frame_in_block(po, skb, status, len);
1085
+ return __packet_lookup_frame_in_block(po, skb, len);
10901086 default:
10911087 WARN(1, "TPACKET version not supported\n");
10921088 BUG();
....@@ -1094,10 +1090,10 @@
10941090 }
10951091 }
10961092
1097
-static void *prb_lookup_block(struct packet_sock *po,
1098
- struct packet_ring_buffer *rb,
1099
- unsigned int idx,
1100
- int status)
1093
+static void *prb_lookup_block(const struct packet_sock *po,
1094
+ const struct packet_ring_buffer *rb,
1095
+ unsigned int idx,
1096
+ int status)
11011097 {
11021098 struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(rb);
11031099 struct tpacket_block_desc *pbd = GET_PBLOCK_DESC(pkc, idx);
....@@ -1210,12 +1206,12 @@
12101206 #define ROOM_LOW 0x1
12111207 #define ROOM_NORMAL 0x2
12121208
1213
-static bool __tpacket_has_room(struct packet_sock *po, int pow_off)
1209
+static bool __tpacket_has_room(const struct packet_sock *po, int pow_off)
12141210 {
12151211 int idx, len;
12161212
1217
- len = po->rx_ring.frame_max + 1;
1218
- idx = po->rx_ring.head;
1213
+ len = READ_ONCE(po->rx_ring.frame_max) + 1;
1214
+ idx = READ_ONCE(po->rx_ring.head);
12191215 if (pow_off)
12201216 idx += len >> pow_off;
12211217 if (idx >= len)
....@@ -1223,12 +1219,12 @@
12231219 return packet_lookup_frame(po, &po->rx_ring, idx, TP_STATUS_KERNEL);
12241220 }
12251221
1226
-static bool __tpacket_v3_has_room(struct packet_sock *po, int pow_off)
1222
+static bool __tpacket_v3_has_room(const struct packet_sock *po, int pow_off)
12271223 {
12281224 int idx, len;
12291225
1230
- len = po->rx_ring.prb_bdqc.knum_blocks;
1231
- idx = po->rx_ring.prb_bdqc.kactive_blk_num;
1226
+ len = READ_ONCE(po->rx_ring.prb_bdqc.knum_blocks);
1227
+ idx = READ_ONCE(po->rx_ring.prb_bdqc.kactive_blk_num);
12321228 if (pow_off)
12331229 idx += len >> pow_off;
12341230 if (idx >= len)
....@@ -1236,15 +1232,18 @@
12361232 return prb_lookup_block(po, &po->rx_ring, idx, TP_STATUS_KERNEL);
12371233 }
12381234
1239
-static int __packet_rcv_has_room(struct packet_sock *po, struct sk_buff *skb)
1235
+static int __packet_rcv_has_room(const struct packet_sock *po,
1236
+ const struct sk_buff *skb)
12401237 {
1241
- struct sock *sk = &po->sk;
1238
+ const struct sock *sk = &po->sk;
12421239 int ret = ROOM_NONE;
12431240
12441241 if (po->prot_hook.func != tpacket_rcv) {
1245
- int avail = sk->sk_rcvbuf - atomic_read(&sk->sk_rmem_alloc)
1246
- - (skb ? skb->truesize : 0);
1247
- if (avail > (sk->sk_rcvbuf >> ROOM_POW_OFF))
1242
+ int rcvbuf = READ_ONCE(sk->sk_rcvbuf);
1243
+ int avail = rcvbuf - atomic_read(&sk->sk_rmem_alloc)
1244
+ - (skb ? skb->truesize : 0);
1245
+
1246
+ if (avail > (rcvbuf >> ROOM_POW_OFF))
12481247 return ROOM_NORMAL;
12491248 else if (avail > 0)
12501249 return ROOM_LOW;
....@@ -1269,17 +1268,22 @@
12691268
12701269 static int packet_rcv_has_room(struct packet_sock *po, struct sk_buff *skb)
12711270 {
1272
- int ret;
1273
- bool has_room;
1271
+ int pressure, ret;
12741272
1275
- spin_lock_bh(&po->sk.sk_receive_queue.lock);
12761273 ret = __packet_rcv_has_room(po, skb);
1277
- has_room = ret == ROOM_NORMAL;
1278
- if (po->pressure == has_room)
1279
- po->pressure = !has_room;
1280
- spin_unlock_bh(&po->sk.sk_receive_queue.lock);
1274
+ pressure = ret != ROOM_NORMAL;
1275
+
1276
+ if (READ_ONCE(po->pressure) != pressure)
1277
+ WRITE_ONCE(po->pressure, pressure);
12811278
12821279 return ret;
1280
+}
1281
+
1282
+static void packet_rcv_try_clear_pressure(struct packet_sock *po)
1283
+{
1284
+ if (READ_ONCE(po->pressure) &&
1285
+ __packet_rcv_has_room(po, NULL) == ROOM_NORMAL)
1286
+ WRITE_ONCE(po->pressure, 0);
12831287 }
12841288
12851289 static void packet_sock_destruct(struct sock *sk)
....@@ -1355,7 +1359,7 @@
13551359 struct packet_sock *po, *po_next, *po_skip = NULL;
13561360 unsigned int i, j, room = ROOM_NONE;
13571361
1358
- po = pkt_sk(f->arr[idx]);
1362
+ po = pkt_sk(rcu_dereference(f->arr[idx]));
13591363
13601364 if (try_self) {
13611365 room = packet_rcv_has_room(po, skb);
....@@ -1367,8 +1371,8 @@
13671371
13681372 i = j = min_t(int, po->rollover->sock, num - 1);
13691373 do {
1370
- po_next = pkt_sk(f->arr[i]);
1371
- if (po_next != po_skip && !po_next->pressure &&
1374
+ po_next = pkt_sk(rcu_dereference(f->arr[i]));
1375
+ if (po_next != po_skip && !READ_ONCE(po_next->pressure) &&
13721376 packet_rcv_has_room(po_next, skb) == ROOM_NORMAL) {
13731377 if (i != j)
13741378 po->rollover->sock = i;
....@@ -1462,7 +1466,7 @@
14621466 if (fanout_has_flag(f, PACKET_FANOUT_FLAG_ROLLOVER))
14631467 idx = fanout_demux_rollover(f, skb, idx, true, num);
14641468
1465
- po = pkt_sk(f->arr[idx]);
1469
+ po = pkt_sk(rcu_dereference(f->arr[idx]));
14661470 return po->prot_hook.func(skb, dev, &po->prot_hook, orig_dev);
14671471 }
14681472
....@@ -1476,7 +1480,7 @@
14761480 struct packet_fanout *f = po->fanout;
14771481
14781482 spin_lock(&f->lock);
1479
- f->arr[f->num_members] = sk;
1483
+ rcu_assign_pointer(f->arr[f->num_members], sk);
14801484 smp_wmb();
14811485 f->num_members++;
14821486 if (f->num_members == 1)
....@@ -1491,11 +1495,14 @@
14911495
14921496 spin_lock(&f->lock);
14931497 for (i = 0; i < f->num_members; i++) {
1494
- if (f->arr[i] == sk)
1498
+ if (rcu_dereference_protected(f->arr[i],
1499
+ lockdep_is_held(&f->lock)) == sk)
14951500 break;
14961501 }
14971502 BUG_ON(i >= f->num_members);
1498
- f->arr[i] = f->arr[f->num_members - 1];
1503
+ rcu_assign_pointer(f->arr[i],
1504
+ rcu_dereference_protected(f->arr[f->num_members - 1],
1505
+ lockdep_is_held(&f->lock)));
14991506 f->num_members--;
15001507 if (f->num_members == 0)
15011508 __dev_remove_pack(&f->prot_hook);
....@@ -1538,7 +1545,7 @@
15381545 }
15391546 }
15401547
1541
-static int fanout_set_data_cbpf(struct packet_sock *po, char __user *data,
1548
+static int fanout_set_data_cbpf(struct packet_sock *po, sockptr_t data,
15421549 unsigned int len)
15431550 {
15441551 struct bpf_prog *new;
....@@ -1547,10 +1554,10 @@
15471554
15481555 if (sock_flag(&po->sk, SOCK_FILTER_LOCKED))
15491556 return -EPERM;
1550
- if (len != sizeof(fprog))
1551
- return -EINVAL;
1552
- if (copy_from_user(&fprog, data, len))
1553
- return -EFAULT;
1557
+
1558
+ ret = copy_bpf_fprog_from_user(&fprog, data, len);
1559
+ if (ret)
1560
+ return ret;
15541561
15551562 ret = bpf_prog_create_from_user(&new, &fprog, NULL, false);
15561563 if (ret)
....@@ -1560,7 +1567,7 @@
15601567 return 0;
15611568 }
15621569
1563
-static int fanout_set_data_ebpf(struct packet_sock *po, char __user *data,
1570
+static int fanout_set_data_ebpf(struct packet_sock *po, sockptr_t data,
15641571 unsigned int len)
15651572 {
15661573 struct bpf_prog *new;
....@@ -1570,7 +1577,7 @@
15701577 return -EPERM;
15711578 if (len != sizeof(fd))
15721579 return -EINVAL;
1573
- if (copy_from_user(&fd, data, len))
1580
+ if (copy_from_sockptr(&fd, data, len))
15741581 return -EFAULT;
15751582
15761583 new = bpf_prog_get_type(fd, BPF_PROG_TYPE_SOCKET_FILTER);
....@@ -1581,7 +1588,7 @@
15811588 return 0;
15821589 }
15831590
1584
-static int fanout_set_data(struct packet_sock *po, char __user *data,
1591
+static int fanout_set_data(struct packet_sock *po, sockptr_t data,
15851592 unsigned int len)
15861593 {
15871594 switch (po->fanout->type) {
....@@ -1633,13 +1640,15 @@
16331640 return false;
16341641 }
16351642
1636
-static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
1643
+static int fanout_add(struct sock *sk, struct fanout_args *args)
16371644 {
16381645 struct packet_rollover *rollover = NULL;
16391646 struct packet_sock *po = pkt_sk(sk);
1647
+ u16 type_flags = args->type_flags;
16401648 struct packet_fanout *f, *match;
16411649 u8 type = type_flags & 0xff;
16421650 u8 flags = type_flags >> 8;
1651
+ u16 id = args->id;
16431652 int err;
16441653
16451654 switch (type) {
....@@ -1697,11 +1706,21 @@
16971706 }
16981707 }
16991708 err = -EINVAL;
1700
- if (match && match->flags != flags)
1701
- goto out;
1702
- if (!match) {
1709
+ if (match) {
1710
+ if (match->flags != flags)
1711
+ goto out;
1712
+ if (args->max_num_members &&
1713
+ args->max_num_members != match->max_num_members)
1714
+ goto out;
1715
+ } else {
1716
+ if (args->max_num_members > PACKET_FANOUT_MAX)
1717
+ goto out;
1718
+ if (!args->max_num_members)
1719
+ /* legacy PACKET_FANOUT_MAX */
1720
+ args->max_num_members = 256;
17031721 err = -ENOMEM;
1704
- match = kzalloc(sizeof(*match), GFP_KERNEL);
1722
+ match = kvzalloc(struct_size(match, arr, args->max_num_members),
1723
+ GFP_KERNEL);
17051724 if (!match)
17061725 goto out;
17071726 write_pnet(&match->net, sock_net(sk));
....@@ -1718,6 +1737,7 @@
17181737 match->prot_hook.af_packet_priv = match;
17191738 match->prot_hook.af_packet_net = read_pnet(&match->net);
17201739 match->prot_hook.id_match = match_fanout_group;
1740
+ match->max_num_members = args->max_num_members;
17211741 list_add(&match->list, &fanout_list);
17221742 }
17231743 err = -EINVAL;
....@@ -1728,7 +1748,7 @@
17281748 match->prot_hook.type == po->prot_hook.type &&
17291749 match->prot_hook.dev == po->prot_hook.dev) {
17301750 err = -ENOSPC;
1731
- if (refcount_read(&match->sk_ref) < PACKET_FANOUT_MAX) {
1751
+ if (refcount_read(&match->sk_ref) < match->max_num_members) {
17321752 __dev_remove_pack(&po->prot_hook);
17331753
17341754 /* Paired with packet_setsockopt(PACKET_FANOUT_DATA) */
....@@ -1745,7 +1765,7 @@
17451765
17461766 if (err && !refcount_read(&match->sk_ref)) {
17471767 list_del(&match->list);
1748
- kfree(match);
1768
+ kvfree(match);
17491769 }
17501770
17511771 out:
....@@ -1835,7 +1855,7 @@
18351855 skb_dst_drop(skb);
18361856
18371857 /* drop conntrack reference */
1838
- nf_reset(skb);
1858
+ nf_reset_ct(skb);
18391859
18401860 spkt = &PACKET_SKB_CB(skb)->sa.pkt;
18411861
....@@ -1863,6 +1883,16 @@
18631883 return 0;
18641884 }
18651885
1886
+static void packet_parse_headers(struct sk_buff *skb, struct socket *sock)
1887
+{
1888
+ if ((!skb->protocol || skb->protocol == htons(ETH_P_ALL)) &&
1889
+ sock->type == SOCK_RAW) {
1890
+ skb_reset_mac_header(skb);
1891
+ skb->protocol = dev_parse_header_protocol(skb);
1892
+ }
1893
+
1894
+ skb_probe_transport_header(skb);
1895
+}
18661896
18671897 /*
18681898 * Output a raw packet to a device layer. This bypasses all the other
....@@ -1978,12 +2008,12 @@
19782008 skb->mark = sk->sk_mark;
19792009 skb->tstamp = sockc.transmit_time;
19802010
1981
- sock_tx_timestamp(sk, sockc.tsflags, &skb_shinfo(skb)->tx_flags);
2011
+ skb_setup_tx_timestamp(skb, sockc.tsflags);
19822012
19832013 if (unlikely(extra_len == 4))
19842014 skb->no_fcs = 1;
19852015
1986
- skb_probe_transport_header(skb, 0);
2016
+ packet_parse_headers(skb, sock);
19872017
19882018 dev_queue_xmit(skb);
19892019 rcu_read_unlock();
....@@ -2060,7 +2090,7 @@
20602090
20612091 skb->dev = dev;
20622092
2063
- if (dev->header_ops) {
2093
+ if (dev_has_header(dev)) {
20642094 /* The device has an explicit notion of ll header,
20652095 * exported to higher levels.
20662096 *
....@@ -2125,7 +2155,7 @@
21252155 skb_dst_drop(skb);
21262156
21272157 /* drop conntrack reference */
2128
- nf_reset(skb);
2158
+ nf_reset_ct(skb);
21292159
21302160 spin_lock(&sk->sk_receive_queue.lock);
21312161 po->stats.stats1.tp_packets++;
....@@ -2137,10 +2167,8 @@
21372167
21382168 drop_n_acct:
21392169 is_drop_n_account = true;
2140
- spin_lock(&sk->sk_receive_queue.lock);
2141
- po->stats.stats1.tp_drops++;
2170
+ atomic_inc(&po->tp_drops);
21422171 atomic_inc(&sk->sk_drops);
2143
- spin_unlock(&sk->sk_receive_queue.lock);
21442172
21452173 drop_n_restore:
21462174 if (skb_head != skb->data && skb_shared(skb)) {
....@@ -2169,7 +2197,7 @@
21692197 unsigned short macoff, hdrlen;
21702198 unsigned int netoff;
21712199 struct sk_buff *copy_skb = NULL;
2172
- struct timespec ts;
2200
+ struct timespec64 ts;
21732201 __u32 ts_status;
21742202 bool is_drop_n_account = false;
21752203 unsigned int slot_id = 0;
....@@ -2191,7 +2219,7 @@
21912219 if (!net_eq(dev_net(dev), sock_net(sk)))
21922220 goto drop;
21932221
2194
- if (dev->header_ops) {
2222
+ if (dev_has_header(dev)) {
21952223 if (sk->sk_type != SOCK_DGRAM)
21962224 skb_push(skb, skb->data - skb_mac_header(skb));
21972225 else if (skb->pkt_type == PACKET_OUTGOING) {
....@@ -2206,11 +2234,16 @@
22062234 if (!res)
22072235 goto drop_n_restore;
22082236
2237
+ /* If we are flooded, just give up */
2238
+ if (__packet_rcv_has_room(po, skb) == ROOM_NONE) {
2239
+ atomic_inc(&po->tp_drops);
2240
+ goto drop_n_restore;
2241
+ }
2242
+
22092243 if (skb->ip_summed == CHECKSUM_PARTIAL)
22102244 status |= TP_STATUS_CSUMNOTREADY;
22112245 else if (skb->pkt_type != PACKET_OUTGOING &&
2212
- (skb->ip_summed == CHECKSUM_COMPLETE ||
2213
- skb_csum_unnecessary(skb)))
2246
+ skb_csum_unnecessary(skb))
22142247 status |= TP_STATUS_CSUM_VALID;
22152248
22162249 if (snaplen > res)
....@@ -2231,9 +2264,7 @@
22312264 macoff = netoff - maclen;
22322265 }
22332266 if (netoff > USHRT_MAX) {
2234
- spin_lock(&sk->sk_receive_queue.lock);
2235
- po->stats.stats1.tp_drops++;
2236
- spin_unlock(&sk->sk_receive_queue.lock);
2267
+ atomic_inc(&po->tp_drops);
22372268 goto drop_n_restore;
22382269 }
22392270 if (po->tp_version <= TPACKET_V2) {
....@@ -2246,8 +2277,11 @@
22462277 copy_skb = skb_get(skb);
22472278 skb_head = skb->data;
22482279 }
2249
- if (copy_skb)
2280
+ if (copy_skb) {
2281
+ memset(&PACKET_SKB_CB(copy_skb)->sa.ll, 0,
2282
+ sizeof(PACKET_SKB_CB(copy_skb)->sa.ll));
22502283 skb_set_owner_r(copy_skb, sk);
2284
+ }
22512285 }
22522286 snaplen = po->rx_ring.frame_size - macoff;
22532287 if ((int)snaplen < 0) {
....@@ -2299,7 +2333,7 @@
22992333 * Anyways, moving it for V1/V2 only as V3 doesn't need this
23002334 * at packet level.
23012335 */
2302
- if (po->stats.stats1.tp_drops)
2336
+ if (atomic_read(&po->tp_drops))
23032337 status |= TP_STATUS_LOSING;
23042338 }
23052339
....@@ -2312,8 +2346,13 @@
23122346
23132347 skb_copy_bits(skb, 0, h.raw + macoff, snaplen);
23142348
2315
- if (!(ts_status = tpacket_get_timestamp(skb, &ts, po->tp_tstamp)))
2316
- getnstimeofday(&ts);
2349
+ /* Always timestamp; prefer an existing software timestamp taken
2350
+ * closer to the time of capture.
2351
+ */
2352
+ ts_status = tpacket_get_timestamp(skb, &ts,
2353
+ po->tp_tstamp | SOF_TIMESTAMPING_SOFTWARE);
2354
+ if (!ts_status)
2355
+ ktime_get_real_ts64(&ts);
23172356
23182357 status |= ts_status;
23192358
....@@ -2412,9 +2451,9 @@
24122451 return 0;
24132452
24142453 drop_n_account:
2415
- is_drop_n_account = true;
2416
- po->stats.stats1.tp_drops++;
24172454 spin_unlock(&sk->sk_receive_queue.lock);
2455
+ atomic_inc(&po->tp_drops);
2456
+ is_drop_n_account = true;
24182457
24192458 sk->sk_data_ready(sk);
24202459 kfree_skb(copy_skb);
....@@ -2440,15 +2479,6 @@
24402479 }
24412480
24422481 sock_wfree(skb);
2443
-}
2444
-
2445
-static void tpacket_set_protocol(const struct net_device *dev,
2446
- struct sk_buff *skb)
2447
-{
2448
- if (dev->type == ARPHRD_ETHER) {
2449
- skb_reset_mac_header(skb);
2450
- skb->protocol = eth_hdr(skb)->h_proto;
2451
- }
24522482 }
24532483
24542484 static int __packet_snd_vnet_parse(struct virtio_net_hdr *vnet_hdr, size_t len)
....@@ -2498,7 +2528,7 @@
24982528 skb->priority = po->sk.sk_priority;
24992529 skb->mark = po->sk.sk_mark;
25002530 skb->tstamp = sockc->transmit_time;
2501
- sock_tx_timestamp(&po->sk, sockc->tsflags, &skb_shinfo(skb)->tx_flags);
2531
+ skb_setup_tx_timestamp(skb, sockc->tsflags);
25022532 skb_zcopy_set_nouarg(skb, ph.raw);
25032533
25042534 skb_reserve(skb, hlen);
....@@ -2521,8 +2551,6 @@
25212551 return err;
25222552 if (!dev_validate_header(dev, skb->data, hdrlen))
25232553 return -EINVAL;
2524
- if (!skb->protocol)
2525
- tpacket_set_protocol(dev, skb);
25262554
25272555 data += hdrlen;
25282556 to_write -= hdrlen;
....@@ -2557,7 +2585,7 @@
25572585 len = ((to_write > len_max) ? len_max : to_write);
25582586 }
25592587
2560
- skb_probe_transport_header(skb, 0);
2588
+ packet_parse_headers(skb, sock);
25612589
25622590 return tp_len;
25632591 }
....@@ -2788,8 +2816,9 @@
27882816
27892817 status = TP_STATUS_SEND_REQUEST;
27902818 err = po->xmit(skb);
2791
- if (unlikely(err > 0)) {
2792
- err = net_xmit_errno(err);
2819
+ if (unlikely(err != 0)) {
2820
+ if (err > 0)
2821
+ err = net_xmit_errno(err);
27932822 if (err && __packet_get_status(po, ph) ==
27942823 TP_STATUS_AVAILABLE) {
27952824 /* skb was destructed already */
....@@ -2956,13 +2985,13 @@
29562985 if (err)
29572986 goto out_free;
29582987
2959
- if (sock->type == SOCK_RAW &&
2960
- !dev_validate_header(dev, skb->data, len)) {
2988
+ if ((sock->type == SOCK_RAW &&
2989
+ !dev_validate_header(dev, skb->data, len)) || !skb->len) {
29612990 err = -EINVAL;
29622991 goto out_free;
29632992 }
29642993
2965
- sock_tx_timestamp(sk, sockc.tsflags, &skb_shinfo(skb)->tx_flags);
2994
+ skb_setup_tx_timestamp(skb, sockc.tsflags);
29662995
29672996 if (!vnet_hdr.gso_type && (len > dev->mtu + reserve + extra_len) &&
29682997 !packet_extra_vlan_len_allowed(dev, skb)) {
....@@ -2984,14 +3013,18 @@
29843013 virtio_net_hdr_set_proto(skb, &vnet_hdr);
29853014 }
29863015
2987
- skb_probe_transport_header(skb, reserve);
3016
+ packet_parse_headers(skb, sock);
29883017
29893018 if (unlikely(extra_len == 4))
29903019 skb->no_fcs = 1;
29913020
29923021 err = po->xmit(skb);
2993
- if (err > 0 && (err = net_xmit_errno(err)) != 0)
2994
- goto out_unlock;
3022
+ if (unlikely(err != 0)) {
3023
+ if (err > 0)
3024
+ err = net_xmit_errno(err);
3025
+ if (err)
3026
+ goto out_unlock;
3027
+ }
29953028
29963029 dev_put(dev);
29973030
....@@ -3011,10 +3044,13 @@
30113044 struct sock *sk = sock->sk;
30123045 struct packet_sock *po = pkt_sk(sk);
30133046
3014
- if (po->tx_ring.pg_vec)
3047
+ /* Reading tx_ring.pg_vec without holding pg_vec_lock is racy.
3048
+ * tpacket_snd() will redo the check safely.
3049
+ */
3050
+ if (data_race(po->tx_ring.pg_vec))
30153051 return tpacket_snd(po, msg);
3016
- else
3017
- return packet_snd(sock, msg, len);
3052
+
3053
+ return packet_snd(sock, msg, len);
30183054 }
30193055
30203056 /*
....@@ -3075,7 +3111,7 @@
30753111 kfree(po->rollover);
30763112 if (f) {
30773113 fanout_release_data(f);
3078
- kfree(f);
3114
+ kvfree(f);
30793115 }
30803116 /*
30813117 * Now the socket is dead. No more input will appear.
....@@ -3370,8 +3406,7 @@
33703406 if (skb == NULL)
33713407 goto out;
33723408
3373
- if (pkt_sk(sk)->pressure)
3374
- packet_rcv_has_room(pkt_sk(sk), NULL);
3409
+ packet_rcv_try_clear_pressure(pkt_sk(sk));
33753410
33763411 if (pkt_sk(sk)->has_vnet_hdr) {
33773412 err = packet_rcv_vnet(msg, skb, &len);
....@@ -3406,6 +3441,8 @@
34063441 sock_recv_ts_and_drops(msg, sk, skb);
34073442
34083443 if (msg->msg_name) {
3444
+ const size_t max_len = min(sizeof(skb->cb),
3445
+ sizeof(struct sockaddr_storage));
34093446 int copy_len;
34103447
34113448 /* If the address length field is there to be filled
....@@ -3428,6 +3465,10 @@
34283465 msg->msg_namelen = sizeof(struct sockaddr_ll);
34293466 }
34303467 }
3468
+ if (WARN_ON_ONCE(copy_len > max_len)) {
3469
+ copy_len = max_len;
3470
+ msg->msg_namelen = copy_len;
3471
+ }
34313472 memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa, copy_len);
34323473 }
34333474
....@@ -3438,8 +3479,7 @@
34383479 if (skb->ip_summed == CHECKSUM_PARTIAL)
34393480 aux.tp_status |= TP_STATUS_CSUMNOTREADY;
34403481 else if (skb->pkt_type != PACKET_OUTGOING &&
3441
- (skb->ip_summed == CHECKSUM_COMPLETE ||
3442
- skb_csum_unnecessary(skb)))
3482
+ skb_csum_unnecessary(skb))
34433483 aux.tp_status |= TP_STATUS_CSUM_VALID;
34443484
34453485 aux.tp_len = origlen;
....@@ -3669,7 +3709,8 @@
36693709 }
36703710
36713711 static int
3672
-packet_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen)
3712
+packet_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval,
3713
+ unsigned int optlen)
36733714 {
36743715 struct sock *sk = sock->sk;
36753716 struct packet_sock *po = pkt_sk(sk);
....@@ -3689,7 +3730,7 @@
36893730 return -EINVAL;
36903731 if (len > sizeof(mreq))
36913732 len = sizeof(mreq);
3692
- if (copy_from_user(&mreq, optval, len))
3733
+ if (copy_from_sockptr(&mreq, optval, len))
36933734 return -EFAULT;
36943735 if (len < (mreq.mr_alen + offsetof(struct packet_mreq, mr_address)))
36953736 return -EINVAL;
....@@ -3720,7 +3761,7 @@
37203761 if (optlen < len) {
37213762 ret = -EINVAL;
37223763 } else {
3723
- if (copy_from_user(&req_u.req, optval, len))
3764
+ if (copy_from_sockptr(&req_u.req, optval, len))
37243765 ret = -EFAULT;
37253766 else
37263767 ret = packet_set_ring(sk, &req_u, 0,
....@@ -3735,7 +3776,7 @@
37353776
37363777 if (optlen != sizeof(val))
37373778 return -EINVAL;
3738
- if (copy_from_user(&val, optval, sizeof(val)))
3779
+ if (copy_from_sockptr(&val, optval, sizeof(val)))
37393780 return -EFAULT;
37403781
37413782 pkt_sk(sk)->copy_thresh = val;
....@@ -3747,7 +3788,7 @@
37473788
37483789 if (optlen != sizeof(val))
37493790 return -EINVAL;
3750
- if (copy_from_user(&val, optval, sizeof(val)))
3791
+ if (copy_from_sockptr(&val, optval, sizeof(val)))
37513792 return -EFAULT;
37523793 switch (val) {
37533794 case TPACKET_V1:
....@@ -3773,7 +3814,7 @@
37733814
37743815 if (optlen != sizeof(val))
37753816 return -EINVAL;
3776
- if (copy_from_user(&val, optval, sizeof(val)))
3817
+ if (copy_from_sockptr(&val, optval, sizeof(val)))
37773818 return -EFAULT;
37783819 if (val > INT_MAX)
37793820 return -EINVAL;
....@@ -3793,7 +3834,7 @@
37933834
37943835 if (optlen != sizeof(val))
37953836 return -EINVAL;
3796
- if (copy_from_user(&val, optval, sizeof(val)))
3837
+ if (copy_from_sockptr(&val, optval, sizeof(val)))
37973838 return -EFAULT;
37983839
37993840 lock_sock(sk);
....@@ -3812,7 +3853,7 @@
38123853
38133854 if (optlen < sizeof(val))
38143855 return -EINVAL;
3815
- if (copy_from_user(&val, optval, sizeof(val)))
3856
+ if (copy_from_sockptr(&val, optval, sizeof(val)))
38163857 return -EFAULT;
38173858
38183859 lock_sock(sk);
....@@ -3826,7 +3867,7 @@
38263867
38273868 if (optlen < sizeof(val))
38283869 return -EINVAL;
3829
- if (copy_from_user(&val, optval, sizeof(val)))
3870
+ if (copy_from_sockptr(&val, optval, sizeof(val)))
38303871 return -EFAULT;
38313872
38323873 lock_sock(sk);
....@@ -3842,7 +3883,7 @@
38423883 return -EINVAL;
38433884 if (optlen < sizeof(val))
38443885 return -EINVAL;
3845
- if (copy_from_user(&val, optval, sizeof(val)))
3886
+ if (copy_from_sockptr(&val, optval, sizeof(val)))
38463887 return -EFAULT;
38473888
38483889 lock_sock(sk);
....@@ -3861,7 +3902,7 @@
38613902
38623903 if (optlen != sizeof(val))
38633904 return -EINVAL;
3864
- if (copy_from_user(&val, optval, sizeof(val)))
3905
+ if (copy_from_sockptr(&val, optval, sizeof(val)))
38653906 return -EFAULT;
38663907
38673908 po->tp_tstamp = val;
....@@ -3869,14 +3910,14 @@
38693910 }
38703911 case PACKET_FANOUT:
38713912 {
3872
- int val;
3913
+ struct fanout_args args = { 0 };
38733914
3874
- if (optlen != sizeof(val))
3915
+ if (optlen != sizeof(int) && optlen != sizeof(args))
38753916 return -EINVAL;
3876
- if (copy_from_user(&val, optval, sizeof(val)))
3917
+ if (copy_from_sockptr(&args, optval, optlen))
38773918 return -EFAULT;
38783919
3879
- return fanout_add(sk, val & 0xffff, val >> 16);
3920
+ return fanout_add(sk, &args);
38803921 }
38813922 case PACKET_FANOUT_DATA:
38823923 {
....@@ -3886,13 +3927,27 @@
38863927
38873928 return fanout_set_data(po, optval, optlen);
38883929 }
3930
+ case PACKET_IGNORE_OUTGOING:
3931
+ {
3932
+ int val;
3933
+
3934
+ if (optlen != sizeof(val))
3935
+ return -EINVAL;
3936
+ if (copy_from_sockptr(&val, optval, sizeof(val)))
3937
+ return -EFAULT;
3938
+ if (val < 0 || val > 1)
3939
+ return -EINVAL;
3940
+
3941
+ po->prot_hook.ignore_outgoing = !!val;
3942
+ return 0;
3943
+ }
38893944 case PACKET_TX_HAS_OFF:
38903945 {
38913946 unsigned int val;
38923947
38933948 if (optlen != sizeof(val))
38943949 return -EINVAL;
3895
- if (copy_from_user(&val, optval, sizeof(val)))
3950
+ if (copy_from_sockptr(&val, optval, sizeof(val)))
38963951 return -EFAULT;
38973952
38983953 lock_sock(sk);
....@@ -3911,7 +3966,7 @@
39113966
39123967 if (optlen != sizeof(val))
39133968 return -EINVAL;
3914
- if (copy_from_user(&val, optval, sizeof(val)))
3969
+ if (copy_from_sockptr(&val, optval, sizeof(val)))
39153970 return -EFAULT;
39163971
39173972 po->xmit = val ? packet_direct_xmit : dev_queue_xmit;
....@@ -3932,6 +3987,7 @@
39323987 void *data = &val;
39333988 union tpacket_stats_u st;
39343989 struct tpacket_rollover_stats rstats;
3990
+ int drops;
39353991
39363992 if (level != SOL_PACKET)
39373993 return -ENOPROTOOPT;
....@@ -3948,14 +4004,17 @@
39484004 memcpy(&st, &po->stats, sizeof(st));
39494005 memset(&po->stats, 0, sizeof(po->stats));
39504006 spin_unlock_bh(&sk->sk_receive_queue.lock);
4007
+ drops = atomic_xchg(&po->tp_drops, 0);
39514008
39524009 if (po->tp_version == TPACKET_V3) {
39534010 lv = sizeof(struct tpacket_stats_v3);
3954
- st.stats3.tp_packets += st.stats3.tp_drops;
4011
+ st.stats3.tp_drops = drops;
4012
+ st.stats3.tp_packets += drops;
39554013 data = &st.stats3;
39564014 } else {
39574015 lv = sizeof(struct tpacket_stats);
3958
- st.stats1.tp_packets += st.stats1.tp_drops;
4016
+ st.stats1.tp_drops = drops;
4017
+ st.stats1.tp_packets += drops;
39594018 data = &st.stats1;
39604019 }
39614020
....@@ -4009,6 +4068,9 @@
40094068 ((u32)po->fanout->flags << 24)) :
40104069 0);
40114070 break;
4071
+ case PACKET_IGNORE_OUTGOING:
4072
+ val = po->prot_hook.ignore_outgoing;
4073
+ break;
40124074 case PACKET_ROLLOVER_STATS:
40134075 if (!po->rollover)
40144076 return -EINVAL;
....@@ -4037,28 +4099,6 @@
40374099 return 0;
40384100 }
40394101
4040
-
4041
-#ifdef CONFIG_COMPAT
4042
-static int compat_packet_setsockopt(struct socket *sock, int level, int optname,
4043
- char __user *optval, unsigned int optlen)
4044
-{
4045
- struct packet_sock *po = pkt_sk(sock->sk);
4046
-
4047
- if (level != SOL_PACKET)
4048
- return -ENOPROTOOPT;
4049
-
4050
- if (optname == PACKET_FANOUT_DATA &&
4051
- po->fanout && po->fanout->type == PACKET_FANOUT_CBPF) {
4052
- optval = (char __user *)get_compat_bpf_fprog(optval);
4053
- if (!optval)
4054
- return -EFAULT;
4055
- optlen = sizeof(struct sock_fprog);
4056
- }
4057
-
4058
- return packet_setsockopt(sock, level, optname, optval, optlen);
4059
-}
4060
-#endif
4061
-
40624102 static int packet_notifier(struct notifier_block *this,
40634103 unsigned long msg, void *ptr)
40644104 {
....@@ -4074,7 +4114,7 @@
40744114 case NETDEV_UNREGISTER:
40754115 if (po->mclist)
40764116 packet_dev_mclist_delete(dev, &po->mclist);
4077
- /* fallthrough */
4117
+ fallthrough;
40784118
40794119 case NETDEV_DOWN:
40804120 if (dev->ifindex == po->ifindex) {
....@@ -4134,11 +4174,6 @@
41344174 spin_unlock_bh(&sk->sk_receive_queue.lock);
41354175 return put_user(amount, (int __user *)arg);
41364176 }
4137
- case SIOCGSTAMP:
4138
- return sock_get_timestamp(sk, (struct timeval __user *)arg);
4139
- case SIOCGSTAMPNS:
4140
- return sock_get_timestampns(sk, (struct timespec __user *)arg);
4141
-
41424177 #ifdef CONFIG_INET
41434178 case SIOCADDRT:
41444179 case SIOCDELRT:
....@@ -4176,8 +4211,7 @@
41764211 TP_STATUS_KERNEL))
41774212 mask |= EPOLLIN | EPOLLRDNORM;
41784213 }
4179
- if (po->pressure && __packet_rcv_has_room(po, NULL) == ROOM_NORMAL)
4180
- po->pressure = 0;
4214
+ packet_rcv_try_clear_pressure(po);
41814215 spin_unlock_bh(&sk->sk_receive_queue.lock);
41824216 spin_lock_bh(&sk->sk_write_queue.lock);
41834217 if (po->tx_ring.pg_vec) {
....@@ -4296,7 +4330,7 @@
42964330 struct packet_ring_buffer *rb;
42974331 struct sk_buff_head *rb_queue;
42984332 __be16 num;
4299
- int err = -EINVAL;
4333
+ int err;
43004334 /* Added to avoid minimal code churn */
43014335 struct tpacket_req *req = &req_u->req;
43024336
....@@ -4526,10 +4560,9 @@
45264560 .getname = packet_getname_spkt,
45274561 .poll = datagram_poll,
45284562 .ioctl = packet_ioctl,
4563
+ .gettstamp = sock_gettstamp,
45294564 .listen = sock_no_listen,
45304565 .shutdown = sock_no_shutdown,
4531
- .setsockopt = sock_no_setsockopt,
4532
- .getsockopt = sock_no_getsockopt,
45334566 .sendmsg = packet_sendmsg_spkt,
45344567 .recvmsg = packet_recvmsg,
45354568 .mmap = sock_no_mmap,
....@@ -4547,13 +4580,11 @@
45474580 .getname = packet_getname,
45484581 .poll = packet_poll,
45494582 .ioctl = packet_ioctl,
4583
+ .gettstamp = sock_gettstamp,
45504584 .listen = sock_no_listen,
45514585 .shutdown = sock_no_shutdown,
45524586 .setsockopt = packet_setsockopt,
45534587 .getsockopt = packet_getsockopt,
4554
-#ifdef CONFIG_COMPAT
4555
- .compat_setsockopt = compat_packet_setsockopt,
4556
-#endif
45574588 .sendmsg = packet_sendmsg,
45584589 .recvmsg = packet_recvmsg,
45594590 .mmap = packet_mmap,
....@@ -4630,9 +4661,11 @@
46304661 mutex_init(&net->packet.sklist_lock);
46314662 INIT_HLIST_HEAD(&net->packet.sklist);
46324663
4664
+#ifdef CONFIG_PROC_FS
46334665 if (!proc_create_net("packet", 0, net->proc_net, &packet_seq_ops,
46344666 sizeof(struct seq_net_private)))
46354667 return -ENOMEM;
4668
+#endif /* CONFIG_PROC_FS */
46364669
46374670 return 0;
46384671 }