.. | .. |
---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-or-later |
---|
1 | 2 | /* |
---|
2 | 3 | * INET An implementation of the TCP/IP protocol suite for the LINUX |
---|
3 | 4 | * operating system. INET is implemented using the BSD Socket |
---|
.. | .. |
---|
43 | 44 | * Chetan Loke : Implemented TPACKET_V3 block abstraction |
---|
44 | 45 | * layer. |
---|
45 | 46 | * Copyright (C) 2011, <lokec@ccs.neu.edu> |
---|
46 | | - * |
---|
47 | | - * |
---|
48 | | - * This program is free software; you can redistribute it and/or |
---|
49 | | - * modify it under the terms of the GNU General Public License |
---|
50 | | - * as published by the Free Software Foundation; either version |
---|
51 | | - * 2 of the License, or (at your option) any later version. |
---|
52 | | - * |
---|
53 | 47 | */ |
---|
54 | 48 | |
---|
55 | 49 | #include <linux/types.h> |
---|
.. | .. |
---|
63 | 57 | #include <linux/if_packet.h> |
---|
64 | 58 | #include <linux/wireless.h> |
---|
65 | 59 | #include <linux/kernel.h> |
---|
66 | | -#include <linux/delay.h> |
---|
67 | 60 | #include <linux/kmod.h> |
---|
68 | 61 | #include <linux/slab.h> |
---|
69 | 62 | #include <linux/vmalloc.h> |
---|
.. | .. |
---|
100 | 93 | |
---|
101 | 94 | /* |
---|
102 | 95 | Assumptions: |
---|
103 | | - - if device has no dev->hard_header routine, it adds and removes ll header |
---|
104 | | - inside itself. In this case ll header is invisible outside of device, |
---|
105 | | - but higher levels still should reserve dev->hard_header_len. |
---|
106 | | - Some devices are enough clever to reallocate skb, when header |
---|
107 | | - will not fit to reserved space (tunnel), another ones are silly |
---|
108 | | - (PPP). |
---|
| 96 | + - If the device has no dev->header_ops->create, there is no LL header |
---|
| 97 | + visible above the device. In this case, its hard_header_len should be 0. |
---|
| 98 | + The device may prepend its own header internally. In this case, its |
---|
| 99 | + needed_headroom should be set to the space needed for it to add its |
---|
| 100 | + internal header. |
---|
| 101 | + For example, a WiFi driver pretending to be an Ethernet driver should |
---|
| 102 | + set its hard_header_len to be the Ethernet header length, and set its |
---|
| 103 | + needed_headroom to be (the real WiFi header length - the fake Ethernet |
---|
| 104 | + header length). |
---|
109 | 105 | - packet socket receives packets with pulled ll header, |
---|
110 | 106 | so that SOCK_RAW should push it back. |
---|
111 | 107 | |
---|
112 | 108 | On receive: |
---|
113 | 109 | ----------- |
---|
114 | 110 | |
---|
115 | | -Incoming, dev->hard_header!=NULL |
---|
| 111 | +Incoming, dev_has_header(dev) == true |
---|
116 | 112 | mac_header -> ll header |
---|
117 | 113 | data -> data |
---|
118 | 114 | |
---|
119 | | -Outgoing, dev->hard_header!=NULL |
---|
| 115 | +Outgoing, dev_has_header(dev) == true |
---|
120 | 116 | mac_header -> ll header |
---|
121 | 117 | data -> ll header |
---|
122 | 118 | |
---|
123 | | -Incoming, dev->hard_header==NULL |
---|
124 | | - mac_header -> UNKNOWN position. It is very likely, that it points to ll |
---|
125 | | - header. PPP makes it, that is wrong, because introduce |
---|
126 | | - assymetry between rx and tx paths. |
---|
| 119 | +Incoming, dev_has_header(dev) == false |
---|
| 120 | + mac_header -> data |
---|
| 121 | + However drivers often make it point to the ll header. |
---|
| 122 | + This is incorrect because the ll header should be invisible to us. |
---|
127 | 123 | data -> data |
---|
128 | 124 | |
---|
129 | | -Outgoing, dev->hard_header==NULL |
---|
130 | | - mac_header -> data. ll header is still not built! |
---|
| 125 | +Outgoing, dev_has_header(dev) == false |
---|
| 126 | + mac_header -> data. ll header is invisible to us. |
---|
131 | 127 | data -> data |
---|
132 | 128 | |
---|
133 | 129 | Resume |
---|
134 | | - If dev->hard_header==NULL we are unlikely to restore sensible ll header. |
---|
| 130 | + If dev_has_header(dev) == false we are unable to restore the ll header, |
---|
| 131 | + because it is invisible to us. |
---|
135 | 132 | |
---|
136 | 133 | |
---|
137 | 134 | On transmit: |
---|
138 | 135 | ------------ |
---|
139 | 136 | |
---|
140 | | -dev->hard_header != NULL |
---|
| 137 | +dev->header_ops != NULL |
---|
141 | 138 | mac_header -> ll header |
---|
142 | 139 | data -> ll header |
---|
143 | 140 | |
---|
144 | | -dev->hard_header == NULL (ll header is added by device, we cannot control it) |
---|
| 141 | +dev->header_ops == NULL (ll header is invisible to us) |
---|
145 | 142 | mac_header -> data |
---|
146 | 143 | data -> data |
---|
147 | 144 | |
---|
148 | | - We should set nh.raw on output to correct posistion, |
---|
| 145 | + We should set network_header on output to the correct position, |
---|
149 | 146 | packet classifier depends on it. |
---|
150 | 147 | */ |
---|
151 | 148 | |
---|
.. | .. |
---|
184 | 181 | #define BLOCK_LEN(x) ((x)->hdr.bh1.blk_len) |
---|
185 | 182 | #define BLOCK_SNUM(x) ((x)->hdr.bh1.seq_num) |
---|
186 | 183 | #define BLOCK_O2PRIV(x) ((x)->offset_to_priv) |
---|
187 | | -#define BLOCK_PRIV(x) ((void *)((char *)(x) + BLOCK_O2PRIV(x))) |
---|
188 | 184 | |
---|
189 | 185 | struct packet_sock; |
---|
190 | 186 | static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, |
---|
.. | .. |
---|
273 | 269 | |
---|
274 | 270 | static bool packet_use_direct_xmit(const struct packet_sock *po) |
---|
275 | 271 | { |
---|
276 | | - return po->xmit == packet_direct_xmit; |
---|
277 | | -} |
---|
278 | | - |
---|
279 | | -static u16 __packet_pick_tx_queue(struct net_device *dev, struct sk_buff *skb, |
---|
280 | | - struct net_device *sb_dev) |
---|
281 | | -{ |
---|
282 | | - return dev_pick_tx_cpu_id(dev, skb, sb_dev, NULL); |
---|
| 272 | + /* Paired with WRITE_ONCE() in packet_setsockopt() */ |
---|
| 273 | + return READ_ONCE(po->xmit) == packet_direct_xmit; |
---|
283 | 274 | } |
---|
284 | 275 | |
---|
285 | 276 | static u16 packet_pick_tx_queue(struct sk_buff *skb) |
---|
286 | 277 | { |
---|
287 | 278 | struct net_device *dev = skb->dev; |
---|
288 | 279 | const struct net_device_ops *ops = dev->netdev_ops; |
---|
| 280 | + int cpu = raw_smp_processor_id(); |
---|
289 | 281 | u16 queue_index; |
---|
290 | 282 | |
---|
| 283 | +#ifdef CONFIG_XPS |
---|
| 284 | + skb->sender_cpu = cpu + 1; |
---|
| 285 | +#endif |
---|
| 286 | + skb_record_rx_queue(skb, cpu % dev->real_num_tx_queues); |
---|
291 | 287 | if (ops->ndo_select_queue) { |
---|
292 | | - queue_index = ops->ndo_select_queue(dev, skb, NULL, |
---|
293 | | - __packet_pick_tx_queue); |
---|
| 288 | + queue_index = ops->ndo_select_queue(dev, skb, NULL); |
---|
294 | 289 | queue_index = netdev_cap_txqueue(dev, queue_index); |
---|
295 | 290 | } else { |
---|
296 | | - queue_index = __packet_pick_tx_queue(dev, skb, NULL); |
---|
| 291 | + queue_index = netdev_pick_tx(dev, skb, NULL); |
---|
297 | 292 | } |
---|
298 | 293 | |
---|
299 | 294 | return queue_index; |
---|
.. | .. |
---|
371 | 366 | { |
---|
372 | 367 | union tpacket_uhdr h; |
---|
373 | 368 | |
---|
| 369 | + /* WRITE_ONCE() are paired with READ_ONCE() in __packet_get_status */ |
---|
| 370 | + |
---|
374 | 371 | h.raw = frame; |
---|
375 | 372 | switch (po->tp_version) { |
---|
376 | 373 | case TPACKET_V1: |
---|
377 | | - h.h1->tp_status = status; |
---|
| 374 | + WRITE_ONCE(h.h1->tp_status, status); |
---|
378 | 375 | flush_dcache_page(pgv_to_page(&h.h1->tp_status)); |
---|
379 | 376 | break; |
---|
380 | 377 | case TPACKET_V2: |
---|
381 | | - h.h2->tp_status = status; |
---|
| 378 | + WRITE_ONCE(h.h2->tp_status, status); |
---|
382 | 379 | flush_dcache_page(pgv_to_page(&h.h2->tp_status)); |
---|
383 | 380 | break; |
---|
384 | 381 | case TPACKET_V3: |
---|
385 | | - h.h3->tp_status = status; |
---|
| 382 | + WRITE_ONCE(h.h3->tp_status, status); |
---|
386 | 383 | flush_dcache_page(pgv_to_page(&h.h3->tp_status)); |
---|
387 | 384 | break; |
---|
388 | 385 | default: |
---|
.. | .. |
---|
393 | 390 | smp_wmb(); |
---|
394 | 391 | } |
---|
395 | 392 | |
---|
396 | | -static int __packet_get_status(struct packet_sock *po, void *frame) |
---|
| 393 | +static int __packet_get_status(const struct packet_sock *po, void *frame) |
---|
397 | 394 | { |
---|
398 | 395 | union tpacket_uhdr h; |
---|
399 | 396 | |
---|
400 | 397 | smp_rmb(); |
---|
401 | 398 | |
---|
| 399 | + /* READ_ONCE() are paired with WRITE_ONCE() in __packet_set_status */ |
---|
| 400 | + |
---|
402 | 401 | h.raw = frame; |
---|
403 | 402 | switch (po->tp_version) { |
---|
404 | 403 | case TPACKET_V1: |
---|
405 | 404 | flush_dcache_page(pgv_to_page(&h.h1->tp_status)); |
---|
406 | | - return h.h1->tp_status; |
---|
| 405 | + return READ_ONCE(h.h1->tp_status); |
---|
407 | 406 | case TPACKET_V2: |
---|
408 | 407 | flush_dcache_page(pgv_to_page(&h.h2->tp_status)); |
---|
409 | | - return h.h2->tp_status; |
---|
| 408 | + return READ_ONCE(h.h2->tp_status); |
---|
410 | 409 | case TPACKET_V3: |
---|
411 | 410 | flush_dcache_page(pgv_to_page(&h.h3->tp_status)); |
---|
412 | | - return h.h3->tp_status; |
---|
| 411 | + return READ_ONCE(h.h3->tp_status); |
---|
413 | 412 | default: |
---|
414 | 413 | WARN(1, "TPACKET version not supported.\n"); |
---|
415 | 414 | BUG(); |
---|
.. | .. |
---|
417 | 416 | } |
---|
418 | 417 | } |
---|
419 | 418 | |
---|
420 | | -static __u32 tpacket_get_timestamp(struct sk_buff *skb, struct timespec *ts, |
---|
| 419 | +static __u32 tpacket_get_timestamp(struct sk_buff *skb, struct timespec64 *ts, |
---|
421 | 420 | unsigned int flags) |
---|
422 | 421 | { |
---|
423 | 422 | struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb); |
---|
424 | 423 | |
---|
425 | 424 | if (shhwtstamps && |
---|
426 | 425 | (flags & SOF_TIMESTAMPING_RAW_HARDWARE) && |
---|
427 | | - ktime_to_timespec_cond(shhwtstamps->hwtstamp, ts)) |
---|
| 426 | + ktime_to_timespec64_cond(shhwtstamps->hwtstamp, ts)) |
---|
428 | 427 | return TP_STATUS_TS_RAW_HARDWARE; |
---|
429 | 428 | |
---|
430 | | - if (ktime_to_timespec_cond(skb->tstamp, ts)) |
---|
| 429 | + if ((flags & SOF_TIMESTAMPING_SOFTWARE) && |
---|
| 430 | + ktime_to_timespec64_cond(skb->tstamp, ts)) |
---|
431 | 431 | return TP_STATUS_TS_SOFTWARE; |
---|
432 | 432 | |
---|
433 | 433 | return 0; |
---|
.. | .. |
---|
437 | 437 | struct sk_buff *skb) |
---|
438 | 438 | { |
---|
439 | 439 | union tpacket_uhdr h; |
---|
440 | | - struct timespec ts; |
---|
| 440 | + struct timespec64 ts; |
---|
441 | 441 | __u32 ts_status; |
---|
442 | 442 | |
---|
443 | 443 | if (!(ts_status = tpacket_get_timestamp(skb, &ts, po->tp_tstamp))) |
---|
444 | 444 | return 0; |
---|
445 | 445 | |
---|
446 | 446 | h.raw = frame; |
---|
| 447 | + /* |
---|
| 448 | + * versions 1 through 3 overflow the timestamps in y2106, since they |
---|
| 449 | + * all store the seconds in a 32-bit unsigned integer. |
---|
| 450 | + * If we create a version 4, that should have a 64-bit timestamp, |
---|
| 451 | + * either 64-bit seconds + 32-bit nanoseconds, or just 64-bit |
---|
| 452 | + * nanoseconds. |
---|
| 453 | + */ |
---|
447 | 454 | switch (po->tp_version) { |
---|
448 | 455 | case TPACKET_V1: |
---|
449 | 456 | h.h1->tp_sec = ts.tv_sec; |
---|
.. | .. |
---|
469 | 476 | return ts_status; |
---|
470 | 477 | } |
---|
471 | 478 | |
---|
472 | | -static void *packet_lookup_frame(struct packet_sock *po, |
---|
473 | | - struct packet_ring_buffer *rb, |
---|
474 | | - unsigned int position, |
---|
475 | | - int status) |
---|
| 479 | +static void *packet_lookup_frame(const struct packet_sock *po, |
---|
| 480 | + const struct packet_ring_buffer *rb, |
---|
| 481 | + unsigned int position, |
---|
| 482 | + int status) |
---|
476 | 483 | { |
---|
477 | 484 | unsigned int pg_vec_pos, frame_offset; |
---|
478 | 485 | union tpacket_uhdr h; |
---|
.. | .. |
---|
529 | 536 | int blk_size_in_bytes) |
---|
530 | 537 | { |
---|
531 | 538 | struct net_device *dev; |
---|
532 | | - unsigned int mbits = 0, msec = 0, div = 0, tmo = 0; |
---|
| 539 | + unsigned int mbits, div; |
---|
533 | 540 | struct ethtool_link_ksettings ecmd; |
---|
534 | 541 | int err; |
---|
535 | 542 | |
---|
.. | .. |
---|
541 | 548 | } |
---|
542 | 549 | err = __ethtool_get_link_ksettings(dev, &ecmd); |
---|
543 | 550 | rtnl_unlock(); |
---|
544 | | - if (!err) { |
---|
545 | | - /* |
---|
546 | | - * If the link speed is so slow you don't really |
---|
547 | | - * need to worry about perf anyways |
---|
548 | | - */ |
---|
549 | | - if (ecmd.base.speed < SPEED_1000 || |
---|
550 | | - ecmd.base.speed == SPEED_UNKNOWN) { |
---|
551 | | - return DEFAULT_PRB_RETIRE_TOV; |
---|
552 | | - } else { |
---|
553 | | - msec = 1; |
---|
554 | | - div = ecmd.base.speed / 1000; |
---|
555 | | - } |
---|
556 | | - } else |
---|
| 551 | + if (err) |
---|
557 | 552 | return DEFAULT_PRB_RETIRE_TOV; |
---|
558 | 553 | |
---|
| 554 | + /* If the link speed is so slow you don't really |
---|
| 555 | + * need to worry about perf anyways |
---|
| 556 | + */ |
---|
| 557 | + if (ecmd.base.speed < SPEED_1000 || |
---|
| 558 | + ecmd.base.speed == SPEED_UNKNOWN) |
---|
| 559 | + return DEFAULT_PRB_RETIRE_TOV; |
---|
| 560 | + |
---|
| 561 | + div = ecmd.base.speed / 1000; |
---|
559 | 562 | mbits = (blk_size_in_bytes * 8) / (1024 * 1024); |
---|
560 | 563 | |
---|
561 | 564 | if (div) |
---|
562 | 565 | mbits /= div; |
---|
563 | 566 | |
---|
564 | | - tmo = mbits * msec; |
---|
565 | | - |
---|
566 | 567 | if (div) |
---|
567 | | - return tmo+1; |
---|
568 | | - return tmo; |
---|
| 568 | + return mbits + 1; |
---|
| 569 | + return mbits; |
---|
569 | 570 | } |
---|
570 | 571 | |
---|
571 | 572 | static void prb_init_ft_ops(struct tpacket_kbdq_core *p1, |
---|
.. | .. |
---|
601 | 602 | req_u->req3.tp_block_size); |
---|
602 | 603 | p1->tov_in_jiffies = msecs_to_jiffies(p1->retire_blk_tov); |
---|
603 | 604 | p1->blk_sizeof_priv = req_u->req3.tp_sizeof_priv; |
---|
| 605 | + rwlock_init(&p1->blk_fill_in_prog_lock); |
---|
604 | 606 | |
---|
605 | 607 | p1->max_frame_len = p1->kblk_size - BLK_PLUS_PRIV(p1->blk_sizeof_priv); |
---|
606 | 608 | prb_init_ft_ops(p1, req_u); |
---|
.. | .. |
---|
667 | 669 | * |
---|
668 | 670 | */ |
---|
669 | 671 | if (BLOCK_NUM_PKTS(pbd)) { |
---|
670 | | - while (atomic_read(&pkc->blk_fill_in_prog)) { |
---|
671 | | - /* Waiting for skb_copy_bits to finish... */ |
---|
672 | | - cpu_chill(); |
---|
673 | | - } |
---|
| 672 | + /* Waiting for skb_copy_bits to finish... */ |
---|
| 673 | + write_lock(&pkc->blk_fill_in_prog_lock); |
---|
| 674 | + write_unlock(&pkc->blk_fill_in_prog_lock); |
---|
674 | 675 | } |
---|
675 | 676 | |
---|
676 | 677 | if (pkc->last_kactive_blk_num == pkc->kactive_blk_num) { |
---|
.. | .. |
---|
768 | 769 | struct tpacket_hdr_v1 *h1 = &pbd1->hdr.bh1; |
---|
769 | 770 | struct sock *sk = &po->sk; |
---|
770 | 771 | |
---|
771 | | - if (po->stats.stats3.tp_drops) |
---|
| 772 | + if (atomic_read(&po->tp_drops)) |
---|
772 | 773 | status |= TP_STATUS_LOSING; |
---|
773 | 774 | |
---|
774 | 775 | last_pkt = (struct tpacket3_hdr *)pkc1->prev; |
---|
.. | .. |
---|
784 | 785 | * It shouldn't really happen as we don't close empty |
---|
785 | 786 | * blocks. See prb_retire_rx_blk_timer_expired(). |
---|
786 | 787 | */ |
---|
787 | | - struct timespec ts; |
---|
788 | | - getnstimeofday(&ts); |
---|
| 788 | + struct timespec64 ts; |
---|
| 789 | + ktime_get_real_ts64(&ts); |
---|
789 | 790 | h1->ts_last_pkt.ts_sec = ts.tv_sec; |
---|
790 | 791 | h1->ts_last_pkt.ts_nsec = ts.tv_nsec; |
---|
791 | 792 | } |
---|
.. | .. |
---|
815 | 816 | static void prb_open_block(struct tpacket_kbdq_core *pkc1, |
---|
816 | 817 | struct tpacket_block_desc *pbd1) |
---|
817 | 818 | { |
---|
818 | | - struct timespec ts; |
---|
| 819 | + struct timespec64 ts; |
---|
819 | 820 | struct tpacket_hdr_v1 *h1 = &pbd1->hdr.bh1; |
---|
820 | 821 | |
---|
821 | 822 | smp_rmb(); |
---|
.. | .. |
---|
828 | 829 | BLOCK_NUM_PKTS(pbd1) = 0; |
---|
829 | 830 | BLOCK_LEN(pbd1) = BLK_PLUS_PRIV(pkc1->blk_sizeof_priv); |
---|
830 | 831 | |
---|
831 | | - getnstimeofday(&ts); |
---|
| 832 | + ktime_get_real_ts64(&ts); |
---|
832 | 833 | |
---|
833 | 834 | h1->ts_first_pkt.ts_sec = ts.tv_sec; |
---|
834 | 835 | h1->ts_first_pkt.ts_nsec = ts.tv_nsec; |
---|
.. | .. |
---|
929 | 930 | * the timer-handler already handled this case. |
---|
930 | 931 | */ |
---|
931 | 932 | if (!(status & TP_STATUS_BLK_TMO)) { |
---|
932 | | - while (atomic_read(&pkc->blk_fill_in_prog)) { |
---|
933 | | - /* Waiting for skb_copy_bits to finish... */ |
---|
934 | | - cpu_chill(); |
---|
935 | | - } |
---|
| 933 | + /* Waiting for skb_copy_bits to finish... */ |
---|
| 934 | + write_lock(&pkc->blk_fill_in_prog_lock); |
---|
| 935 | + write_unlock(&pkc->blk_fill_in_prog_lock); |
---|
936 | 936 | } |
---|
937 | 937 | prb_close_block(pkc, pbd, po, status); |
---|
938 | 938 | return; |
---|
.. | .. |
---|
953 | 953 | __releases(&pkc->blk_fill_in_prog_lock) |
---|
954 | 954 | { |
---|
955 | 955 | struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(rb); |
---|
956 | | - atomic_dec(&pkc->blk_fill_in_prog); |
---|
| 956 | + |
---|
| 957 | + read_unlock(&pkc->blk_fill_in_prog_lock); |
---|
957 | 958 | } |
---|
958 | 959 | |
---|
959 | 960 | static void prb_fill_rxhash(struct tpacket_kbdq_core *pkc, |
---|
.. | .. |
---|
1008 | 1009 | pkc->nxt_offset += TOTAL_PKT_LEN_INCL_ALIGN(len); |
---|
1009 | 1010 | BLOCK_LEN(pbd) += TOTAL_PKT_LEN_INCL_ALIGN(len); |
---|
1010 | 1011 | BLOCK_NUM_PKTS(pbd) += 1; |
---|
1011 | | - atomic_inc(&pkc->blk_fill_in_prog); |
---|
| 1012 | + read_lock(&pkc->blk_fill_in_prog_lock); |
---|
1012 | 1013 | prb_run_all_ft_ops(pkc, ppd); |
---|
1013 | 1014 | } |
---|
1014 | 1015 | |
---|
1015 | 1016 | /* Assumes caller has the sk->rx_queue.lock */ |
---|
1016 | 1017 | static void *__packet_lookup_frame_in_block(struct packet_sock *po, |
---|
1017 | 1018 | struct sk_buff *skb, |
---|
1018 | | - int status, |
---|
1019 | 1019 | unsigned int len |
---|
1020 | 1020 | ) |
---|
1021 | 1021 | { |
---|
.. | .. |
---|
1087 | 1087 | po->rx_ring.head, status); |
---|
1088 | 1088 | return curr; |
---|
1089 | 1089 | case TPACKET_V3: |
---|
1090 | | - return __packet_lookup_frame_in_block(po, skb, status, len); |
---|
| 1090 | + return __packet_lookup_frame_in_block(po, skb, len); |
---|
1091 | 1091 | default: |
---|
1092 | 1092 | WARN(1, "TPACKET version not supported\n"); |
---|
1093 | 1093 | BUG(); |
---|
.. | .. |
---|
1095 | 1095 | } |
---|
1096 | 1096 | } |
---|
1097 | 1097 | |
---|
1098 | | -static void *prb_lookup_block(struct packet_sock *po, |
---|
1099 | | - struct packet_ring_buffer *rb, |
---|
1100 | | - unsigned int idx, |
---|
1101 | | - int status) |
---|
| 1098 | +static void *prb_lookup_block(const struct packet_sock *po, |
---|
| 1099 | + const struct packet_ring_buffer *rb, |
---|
| 1100 | + unsigned int idx, |
---|
| 1101 | + int status) |
---|
1102 | 1102 | { |
---|
1103 | 1103 | struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(rb); |
---|
1104 | 1104 | struct tpacket_block_desc *pbd = GET_PBLOCK_DESC(pkc, idx); |
---|
.. | .. |
---|
1211 | 1211 | #define ROOM_LOW 0x1 |
---|
1212 | 1212 | #define ROOM_NORMAL 0x2 |
---|
1213 | 1213 | |
---|
1214 | | -static bool __tpacket_has_room(struct packet_sock *po, int pow_off) |
---|
| 1214 | +static bool __tpacket_has_room(const struct packet_sock *po, int pow_off) |
---|
1215 | 1215 | { |
---|
1216 | 1216 | int idx, len; |
---|
1217 | 1217 | |
---|
1218 | | - len = po->rx_ring.frame_max + 1; |
---|
1219 | | - idx = po->rx_ring.head; |
---|
| 1218 | + len = READ_ONCE(po->rx_ring.frame_max) + 1; |
---|
| 1219 | + idx = READ_ONCE(po->rx_ring.head); |
---|
1220 | 1220 | if (pow_off) |
---|
1221 | 1221 | idx += len >> pow_off; |
---|
1222 | 1222 | if (idx >= len) |
---|
.. | .. |
---|
1224 | 1224 | return packet_lookup_frame(po, &po->rx_ring, idx, TP_STATUS_KERNEL); |
---|
1225 | 1225 | } |
---|
1226 | 1226 | |
---|
1227 | | -static bool __tpacket_v3_has_room(struct packet_sock *po, int pow_off) |
---|
| 1227 | +static bool __tpacket_v3_has_room(const struct packet_sock *po, int pow_off) |
---|
1228 | 1228 | { |
---|
1229 | 1229 | int idx, len; |
---|
1230 | 1230 | |
---|
1231 | | - len = po->rx_ring.prb_bdqc.knum_blocks; |
---|
1232 | | - idx = po->rx_ring.prb_bdqc.kactive_blk_num; |
---|
| 1231 | + len = READ_ONCE(po->rx_ring.prb_bdqc.knum_blocks); |
---|
| 1232 | + idx = READ_ONCE(po->rx_ring.prb_bdqc.kactive_blk_num); |
---|
1233 | 1233 | if (pow_off) |
---|
1234 | 1234 | idx += len >> pow_off; |
---|
1235 | 1235 | if (idx >= len) |
---|
.. | .. |
---|
1237 | 1237 | return prb_lookup_block(po, &po->rx_ring, idx, TP_STATUS_KERNEL); |
---|
1238 | 1238 | } |
---|
1239 | 1239 | |
---|
1240 | | -static int __packet_rcv_has_room(struct packet_sock *po, struct sk_buff *skb) |
---|
| 1240 | +static int __packet_rcv_has_room(const struct packet_sock *po, |
---|
| 1241 | + const struct sk_buff *skb) |
---|
1241 | 1242 | { |
---|
1242 | | - struct sock *sk = &po->sk; |
---|
| 1243 | + const struct sock *sk = &po->sk; |
---|
1243 | 1244 | int ret = ROOM_NONE; |
---|
1244 | 1245 | |
---|
1245 | 1246 | if (po->prot_hook.func != tpacket_rcv) { |
---|
1246 | | - int avail = sk->sk_rcvbuf - atomic_read(&sk->sk_rmem_alloc) |
---|
1247 | | - - (skb ? skb->truesize : 0); |
---|
1248 | | - if (avail > (sk->sk_rcvbuf >> ROOM_POW_OFF)) |
---|
| 1247 | + int rcvbuf = READ_ONCE(sk->sk_rcvbuf); |
---|
| 1248 | + int avail = rcvbuf - atomic_read(&sk->sk_rmem_alloc) |
---|
| 1249 | + - (skb ? skb->truesize : 0); |
---|
| 1250 | + |
---|
| 1251 | + if (avail > (rcvbuf >> ROOM_POW_OFF)) |
---|
1249 | 1252 | return ROOM_NORMAL; |
---|
1250 | 1253 | else if (avail > 0) |
---|
1251 | 1254 | return ROOM_LOW; |
---|
.. | .. |
---|
1270 | 1273 | |
---|
1271 | 1274 | static int packet_rcv_has_room(struct packet_sock *po, struct sk_buff *skb) |
---|
1272 | 1275 | { |
---|
1273 | | - int ret; |
---|
1274 | | - bool has_room; |
---|
| 1276 | + int pressure, ret; |
---|
1275 | 1277 | |
---|
1276 | | - spin_lock_bh(&po->sk.sk_receive_queue.lock); |
---|
1277 | 1278 | ret = __packet_rcv_has_room(po, skb); |
---|
1278 | | - has_room = ret == ROOM_NORMAL; |
---|
1279 | | - if (po->pressure == has_room) |
---|
1280 | | - po->pressure = !has_room; |
---|
1281 | | - spin_unlock_bh(&po->sk.sk_receive_queue.lock); |
---|
| 1279 | + pressure = ret != ROOM_NORMAL; |
---|
| 1280 | + |
---|
| 1281 | + if (READ_ONCE(po->pressure) != pressure) |
---|
| 1282 | + WRITE_ONCE(po->pressure, pressure); |
---|
1282 | 1283 | |
---|
1283 | 1284 | return ret; |
---|
| 1285 | +} |
---|
| 1286 | + |
---|
| 1287 | +static void packet_rcv_try_clear_pressure(struct packet_sock *po) |
---|
| 1288 | +{ |
---|
| 1289 | + if (READ_ONCE(po->pressure) && |
---|
| 1290 | + __packet_rcv_has_room(po, NULL) == ROOM_NORMAL) |
---|
| 1291 | + WRITE_ONCE(po->pressure, 0); |
---|
1284 | 1292 | } |
---|
1285 | 1293 | |
---|
1286 | 1294 | static void packet_sock_destruct(struct sock *sk) |
---|
.. | .. |
---|
1356 | 1364 | struct packet_sock *po, *po_next, *po_skip = NULL; |
---|
1357 | 1365 | unsigned int i, j, room = ROOM_NONE; |
---|
1358 | 1366 | |
---|
1359 | | - po = pkt_sk(f->arr[idx]); |
---|
| 1367 | + po = pkt_sk(rcu_dereference(f->arr[idx])); |
---|
1360 | 1368 | |
---|
1361 | 1369 | if (try_self) { |
---|
1362 | 1370 | room = packet_rcv_has_room(po, skb); |
---|
.. | .. |
---|
1368 | 1376 | |
---|
1369 | 1377 | i = j = min_t(int, po->rollover->sock, num - 1); |
---|
1370 | 1378 | do { |
---|
1371 | | - po_next = pkt_sk(f->arr[i]); |
---|
1372 | | - if (po_next != po_skip && !po_next->pressure && |
---|
| 1379 | + po_next = pkt_sk(rcu_dereference(f->arr[i])); |
---|
| 1380 | + if (po_next != po_skip && !READ_ONCE(po_next->pressure) && |
---|
1373 | 1381 | packet_rcv_has_room(po_next, skb) == ROOM_NORMAL) { |
---|
1374 | 1382 | if (i != j) |
---|
1375 | 1383 | po->rollover->sock = i; |
---|
.. | .. |
---|
1463 | 1471 | if (fanout_has_flag(f, PACKET_FANOUT_FLAG_ROLLOVER)) |
---|
1464 | 1472 | idx = fanout_demux_rollover(f, skb, idx, true, num); |
---|
1465 | 1473 | |
---|
1466 | | - po = pkt_sk(f->arr[idx]); |
---|
| 1474 | + po = pkt_sk(rcu_dereference(f->arr[idx])); |
---|
1467 | 1475 | return po->prot_hook.func(skb, dev, &po->prot_hook, orig_dev); |
---|
1468 | 1476 | } |
---|
1469 | 1477 | |
---|
.. | .. |
---|
1477 | 1485 | struct packet_fanout *f = po->fanout; |
---|
1478 | 1486 | |
---|
1479 | 1487 | spin_lock(&f->lock); |
---|
1480 | | - f->arr[f->num_members] = sk; |
---|
| 1488 | + rcu_assign_pointer(f->arr[f->num_members], sk); |
---|
1481 | 1489 | smp_wmb(); |
---|
1482 | 1490 | f->num_members++; |
---|
1483 | 1491 | if (f->num_members == 1) |
---|
.. | .. |
---|
1492 | 1500 | |
---|
1493 | 1501 | spin_lock(&f->lock); |
---|
1494 | 1502 | for (i = 0; i < f->num_members; i++) { |
---|
1495 | | - if (f->arr[i] == sk) |
---|
| 1503 | + if (rcu_dereference_protected(f->arr[i], |
---|
| 1504 | + lockdep_is_held(&f->lock)) == sk) |
---|
1496 | 1505 | break; |
---|
1497 | 1506 | } |
---|
1498 | 1507 | BUG_ON(i >= f->num_members); |
---|
1499 | | - f->arr[i] = f->arr[f->num_members - 1]; |
---|
| 1508 | + rcu_assign_pointer(f->arr[i], |
---|
| 1509 | + rcu_dereference_protected(f->arr[f->num_members - 1], |
---|
| 1510 | + lockdep_is_held(&f->lock))); |
---|
1500 | 1511 | f->num_members--; |
---|
1501 | 1512 | if (f->num_members == 0) |
---|
1502 | 1513 | __dev_remove_pack(&f->prot_hook); |
---|
.. | .. |
---|
1539 | 1550 | } |
---|
1540 | 1551 | } |
---|
1541 | 1552 | |
---|
1542 | | -static int fanout_set_data_cbpf(struct packet_sock *po, char __user *data, |
---|
| 1553 | +static int fanout_set_data_cbpf(struct packet_sock *po, sockptr_t data, |
---|
1543 | 1554 | unsigned int len) |
---|
1544 | 1555 | { |
---|
1545 | 1556 | struct bpf_prog *new; |
---|
.. | .. |
---|
1548 | 1559 | |
---|
1549 | 1560 | if (sock_flag(&po->sk, SOCK_FILTER_LOCKED)) |
---|
1550 | 1561 | return -EPERM; |
---|
1551 | | - if (len != sizeof(fprog)) |
---|
1552 | | - return -EINVAL; |
---|
1553 | | - if (copy_from_user(&fprog, data, len)) |
---|
1554 | | - return -EFAULT; |
---|
| 1562 | + |
---|
| 1563 | + ret = copy_bpf_fprog_from_user(&fprog, data, len); |
---|
| 1564 | + if (ret) |
---|
| 1565 | + return ret; |
---|
1555 | 1566 | |
---|
1556 | 1567 | ret = bpf_prog_create_from_user(&new, &fprog, NULL, false); |
---|
1557 | 1568 | if (ret) |
---|
.. | .. |
---|
1561 | 1572 | return 0; |
---|
1562 | 1573 | } |
---|
1563 | 1574 | |
---|
1564 | | -static int fanout_set_data_ebpf(struct packet_sock *po, char __user *data, |
---|
| 1575 | +static int fanout_set_data_ebpf(struct packet_sock *po, sockptr_t data, |
---|
1565 | 1576 | unsigned int len) |
---|
1566 | 1577 | { |
---|
1567 | 1578 | struct bpf_prog *new; |
---|
.. | .. |
---|
1571 | 1582 | return -EPERM; |
---|
1572 | 1583 | if (len != sizeof(fd)) |
---|
1573 | 1584 | return -EINVAL; |
---|
1574 | | - if (copy_from_user(&fd, data, len)) |
---|
| 1585 | + if (copy_from_sockptr(&fd, data, len)) |
---|
1575 | 1586 | return -EFAULT; |
---|
1576 | 1587 | |
---|
1577 | 1588 | new = bpf_prog_get_type(fd, BPF_PROG_TYPE_SOCKET_FILTER); |
---|
.. | .. |
---|
1582 | 1593 | return 0; |
---|
1583 | 1594 | } |
---|
1584 | 1595 | |
---|
1585 | | -static int fanout_set_data(struct packet_sock *po, char __user *data, |
---|
| 1596 | +static int fanout_set_data(struct packet_sock *po, sockptr_t data, |
---|
1586 | 1597 | unsigned int len) |
---|
1587 | 1598 | { |
---|
1588 | 1599 | switch (po->fanout->type) { |
---|
.. | .. |
---|
1634 | 1645 | return false; |
---|
1635 | 1646 | } |
---|
1636 | 1647 | |
---|
1637 | | -static int fanout_add(struct sock *sk, u16 id, u16 type_flags) |
---|
| 1648 | +static int fanout_add(struct sock *sk, struct fanout_args *args) |
---|
1638 | 1649 | { |
---|
1639 | 1650 | struct packet_rollover *rollover = NULL; |
---|
1640 | 1651 | struct packet_sock *po = pkt_sk(sk); |
---|
| 1652 | + u16 type_flags = args->type_flags; |
---|
1641 | 1653 | struct packet_fanout *f, *match; |
---|
1642 | 1654 | u8 type = type_flags & 0xff; |
---|
1643 | 1655 | u8 flags = type_flags >> 8; |
---|
| 1656 | + u16 id = args->id; |
---|
1644 | 1657 | int err; |
---|
1645 | 1658 | |
---|
1646 | 1659 | switch (type) { |
---|
.. | .. |
---|
1698 | 1711 | } |
---|
1699 | 1712 | } |
---|
1700 | 1713 | err = -EINVAL; |
---|
1701 | | - if (match && match->flags != flags) |
---|
1702 | | - goto out; |
---|
1703 | | - if (!match) { |
---|
| 1714 | + if (match) { |
---|
| 1715 | + if (match->flags != flags) |
---|
| 1716 | + goto out; |
---|
| 1717 | + if (args->max_num_members && |
---|
| 1718 | + args->max_num_members != match->max_num_members) |
---|
| 1719 | + goto out; |
---|
| 1720 | + } else { |
---|
| 1721 | + if (args->max_num_members > PACKET_FANOUT_MAX) |
---|
| 1722 | + goto out; |
---|
| 1723 | + if (!args->max_num_members) |
---|
| 1724 | + /* legacy PACKET_FANOUT_MAX */ |
---|
| 1725 | + args->max_num_members = 256; |
---|
1704 | 1726 | err = -ENOMEM; |
---|
1705 | | - match = kzalloc(sizeof(*match), GFP_KERNEL); |
---|
| 1727 | + match = kvzalloc(struct_size(match, arr, args->max_num_members), |
---|
| 1728 | + GFP_KERNEL); |
---|
1706 | 1729 | if (!match) |
---|
1707 | 1730 | goto out; |
---|
1708 | 1731 | write_pnet(&match->net, sock_net(sk)); |
---|
.. | .. |
---|
1719 | 1742 | match->prot_hook.af_packet_priv = match; |
---|
1720 | 1743 | match->prot_hook.af_packet_net = read_pnet(&match->net); |
---|
1721 | 1744 | match->prot_hook.id_match = match_fanout_group; |
---|
| 1745 | + match->max_num_members = args->max_num_members; |
---|
1722 | 1746 | list_add(&match->list, &fanout_list); |
---|
1723 | 1747 | } |
---|
1724 | 1748 | err = -EINVAL; |
---|
.. | .. |
---|
1729 | 1753 | match->prot_hook.type == po->prot_hook.type && |
---|
1730 | 1754 | match->prot_hook.dev == po->prot_hook.dev) { |
---|
1731 | 1755 | err = -ENOSPC; |
---|
1732 | | - if (refcount_read(&match->sk_ref) < PACKET_FANOUT_MAX) { |
---|
| 1756 | + if (refcount_read(&match->sk_ref) < match->max_num_members) { |
---|
1733 | 1757 | __dev_remove_pack(&po->prot_hook); |
---|
1734 | 1758 | |
---|
1735 | 1759 | /* Paired with packet_setsockopt(PACKET_FANOUT_DATA) */ |
---|
.. | .. |
---|
1746 | 1770 | |
---|
1747 | 1771 | if (err && !refcount_read(&match->sk_ref)) { |
---|
1748 | 1772 | list_del(&match->list); |
---|
1749 | | - kfree(match); |
---|
| 1773 | + kvfree(match); |
---|
1750 | 1774 | } |
---|
1751 | 1775 | |
---|
1752 | 1776 | out: |
---|
.. | .. |
---|
1836 | 1860 | skb_dst_drop(skb); |
---|
1837 | 1861 | |
---|
1838 | 1862 | /* drop conntrack reference */ |
---|
1839 | | - nf_reset(skb); |
---|
| 1863 | + nf_reset_ct(skb); |
---|
1840 | 1864 | |
---|
1841 | 1865 | spkt = &PACKET_SKB_CB(skb)->sa.pkt; |
---|
1842 | 1866 | |
---|
.. | .. |
---|
1864 | 1888 | return 0; |
---|
1865 | 1889 | } |
---|
1866 | 1890 | |
---|
| 1891 | +static void packet_parse_headers(struct sk_buff *skb, struct socket *sock) |
---|
| 1892 | +{ |
---|
| 1893 | + int depth; |
---|
| 1894 | + |
---|
| 1895 | + if ((!skb->protocol || skb->protocol == htons(ETH_P_ALL)) && |
---|
| 1896 | + sock->type == SOCK_RAW) { |
---|
| 1897 | + skb_reset_mac_header(skb); |
---|
| 1898 | + skb->protocol = dev_parse_header_protocol(skb); |
---|
| 1899 | + } |
---|
| 1900 | + |
---|
| 1901 | + /* Move network header to the right position for VLAN tagged packets */ |
---|
| 1902 | + if (likely(skb->dev->type == ARPHRD_ETHER) && |
---|
| 1903 | + eth_type_vlan(skb->protocol) && |
---|
| 1904 | + vlan_get_protocol_and_depth(skb, skb->protocol, &depth) != 0) |
---|
| 1905 | + skb_set_network_header(skb, depth); |
---|
| 1906 | + |
---|
| 1907 | + skb_probe_transport_header(skb); |
---|
| 1908 | +} |
---|
1867 | 1909 | |
---|
1868 | 1910 | /* |
---|
1869 | 1911 | * Output a raw packet to a device layer. This bypasses all the other |
---|
.. | .. |
---|
1956 | 1998 | goto retry; |
---|
1957 | 1999 | } |
---|
1958 | 2000 | |
---|
1959 | | - if (!dev_validate_header(dev, skb->data, len)) { |
---|
| 2001 | + if (!dev_validate_header(dev, skb->data, len) || !skb->len) { |
---|
1960 | 2002 | err = -EINVAL; |
---|
1961 | 2003 | goto out_unlock; |
---|
1962 | 2004 | } |
---|
.. | .. |
---|
1979 | 2021 | skb->mark = sk->sk_mark; |
---|
1980 | 2022 | skb->tstamp = sockc.transmit_time; |
---|
1981 | 2023 | |
---|
1982 | | - sock_tx_timestamp(sk, sockc.tsflags, &skb_shinfo(skb)->tx_flags); |
---|
| 2024 | + skb_setup_tx_timestamp(skb, sockc.tsflags); |
---|
1983 | 2025 | |
---|
1984 | 2026 | if (unlikely(extra_len == 4)) |
---|
1985 | 2027 | skb->no_fcs = 1; |
---|
1986 | 2028 | |
---|
1987 | | - skb_probe_transport_header(skb, 0); |
---|
| 2029 | + packet_parse_headers(skb, sock); |
---|
1988 | 2030 | |
---|
1989 | 2031 | dev_queue_xmit(skb); |
---|
1990 | 2032 | rcu_read_unlock(); |
---|
.. | .. |
---|
2061 | 2103 | |
---|
2062 | 2104 | skb->dev = dev; |
---|
2063 | 2105 | |
---|
2064 | | - if (dev->header_ops) { |
---|
| 2106 | + if (dev_has_header(dev)) { |
---|
2065 | 2107 | /* The device has an explicit notion of ll header, |
---|
2066 | 2108 | * exported to higher levels. |
---|
2067 | 2109 | * |
---|
.. | .. |
---|
2106 | 2148 | sll = &PACKET_SKB_CB(skb)->sa.ll; |
---|
2107 | 2149 | sll->sll_hatype = dev->type; |
---|
2108 | 2150 | sll->sll_pkttype = skb->pkt_type; |
---|
2109 | | - if (unlikely(po->origdev)) |
---|
| 2151 | + if (unlikely(packet_sock_flag(po, PACKET_SOCK_ORIGDEV))) |
---|
2110 | 2152 | sll->sll_ifindex = orig_dev->ifindex; |
---|
2111 | 2153 | else |
---|
2112 | 2154 | sll->sll_ifindex = dev->ifindex; |
---|
.. | .. |
---|
2126 | 2168 | skb_dst_drop(skb); |
---|
2127 | 2169 | |
---|
2128 | 2170 | /* drop conntrack reference */ |
---|
2129 | | - nf_reset(skb); |
---|
| 2171 | + nf_reset_ct(skb); |
---|
2130 | 2172 | |
---|
2131 | 2173 | spin_lock(&sk->sk_receive_queue.lock); |
---|
2132 | 2174 | po->stats.stats1.tp_packets++; |
---|
.. | .. |
---|
2138 | 2180 | |
---|
2139 | 2181 | drop_n_acct: |
---|
2140 | 2182 | is_drop_n_account = true; |
---|
2141 | | - spin_lock(&sk->sk_receive_queue.lock); |
---|
2142 | | - po->stats.stats1.tp_drops++; |
---|
| 2183 | + atomic_inc(&po->tp_drops); |
---|
2143 | 2184 | atomic_inc(&sk->sk_drops); |
---|
2144 | | - spin_unlock(&sk->sk_receive_queue.lock); |
---|
2145 | 2185 | |
---|
2146 | 2186 | drop_n_restore: |
---|
2147 | 2187 | if (skb_head != skb->data && skb_shared(skb)) { |
---|
.. | .. |
---|
2170 | 2210 | unsigned short macoff, hdrlen; |
---|
2171 | 2211 | unsigned int netoff; |
---|
2172 | 2212 | struct sk_buff *copy_skb = NULL; |
---|
2173 | | - struct timespec ts; |
---|
| 2213 | + struct timespec64 ts; |
---|
2174 | 2214 | __u32 ts_status; |
---|
2175 | 2215 | bool is_drop_n_account = false; |
---|
2176 | 2216 | unsigned int slot_id = 0; |
---|
.. | .. |
---|
2192 | 2232 | if (!net_eq(dev_net(dev), sock_net(sk))) |
---|
2193 | 2233 | goto drop; |
---|
2194 | 2234 | |
---|
2195 | | - if (dev->header_ops) { |
---|
| 2235 | + if (dev_has_header(dev)) { |
---|
2196 | 2236 | if (sk->sk_type != SOCK_DGRAM) |
---|
2197 | 2237 | skb_push(skb, skb->data - skb_mac_header(skb)); |
---|
2198 | 2238 | else if (skb->pkt_type == PACKET_OUTGOING) { |
---|
.. | .. |
---|
2207 | 2247 | if (!res) |
---|
2208 | 2248 | goto drop_n_restore; |
---|
2209 | 2249 | |
---|
| 2250 | + /* If we are flooded, just give up */ |
---|
| 2251 | + if (__packet_rcv_has_room(po, skb) == ROOM_NONE) { |
---|
| 2252 | + atomic_inc(&po->tp_drops); |
---|
| 2253 | + goto drop_n_restore; |
---|
| 2254 | + } |
---|
| 2255 | + |
---|
2210 | 2256 | if (skb->ip_summed == CHECKSUM_PARTIAL) |
---|
2211 | 2257 | status |= TP_STATUS_CSUMNOTREADY; |
---|
2212 | 2258 | else if (skb->pkt_type != PACKET_OUTGOING && |
---|
2213 | | - (skb->ip_summed == CHECKSUM_COMPLETE || |
---|
2214 | | - skb_csum_unnecessary(skb))) |
---|
| 2259 | + skb_csum_unnecessary(skb)) |
---|
2215 | 2260 | status |= TP_STATUS_CSUM_VALID; |
---|
2216 | 2261 | |
---|
2217 | 2262 | if (snaplen > res) |
---|
.. | .. |
---|
2232 | 2277 | macoff = netoff - maclen; |
---|
2233 | 2278 | } |
---|
2234 | 2279 | if (netoff > USHRT_MAX) { |
---|
2235 | | - spin_lock(&sk->sk_receive_queue.lock); |
---|
2236 | | - po->stats.stats1.tp_drops++; |
---|
2237 | | - spin_unlock(&sk->sk_receive_queue.lock); |
---|
| 2280 | + atomic_inc(&po->tp_drops); |
---|
2238 | 2281 | goto drop_n_restore; |
---|
2239 | 2282 | } |
---|
2240 | 2283 | if (po->tp_version <= TPACKET_V2) { |
---|
.. | .. |
---|
2247 | 2290 | copy_skb = skb_get(skb); |
---|
2248 | 2291 | skb_head = skb->data; |
---|
2249 | 2292 | } |
---|
2250 | | - if (copy_skb) |
---|
| 2293 | + if (copy_skb) { |
---|
| 2294 | + memset(&PACKET_SKB_CB(copy_skb)->sa.ll, 0, |
---|
| 2295 | + sizeof(PACKET_SKB_CB(copy_skb)->sa.ll)); |
---|
2251 | 2296 | skb_set_owner_r(copy_skb, sk); |
---|
| 2297 | + } |
---|
2252 | 2298 | } |
---|
2253 | 2299 | snaplen = po->rx_ring.frame_size - macoff; |
---|
2254 | 2300 | if ((int)snaplen < 0) { |
---|
.. | .. |
---|
2300 | 2346 | * Anyways, moving it for V1/V2 only as V3 doesn't need this |
---|
2301 | 2347 | * at packet level. |
---|
2302 | 2348 | */ |
---|
2303 | | - if (po->stats.stats1.tp_drops) |
---|
| 2349 | + if (atomic_read(&po->tp_drops)) |
---|
2304 | 2350 | status |= TP_STATUS_LOSING; |
---|
2305 | 2351 | } |
---|
2306 | 2352 | |
---|
.. | .. |
---|
2313 | 2359 | |
---|
2314 | 2360 | skb_copy_bits(skb, 0, h.raw + macoff, snaplen); |
---|
2315 | 2361 | |
---|
2316 | | - if (!(ts_status = tpacket_get_timestamp(skb, &ts, po->tp_tstamp))) |
---|
2317 | | - getnstimeofday(&ts); |
---|
| 2362 | + /* Always timestamp; prefer an existing software timestamp taken |
---|
| 2363 | + * closer to the time of capture. |
---|
| 2364 | + */ |
---|
| 2365 | + ts_status = tpacket_get_timestamp(skb, &ts, |
---|
| 2366 | + po->tp_tstamp | SOF_TIMESTAMPING_SOFTWARE); |
---|
| 2367 | + if (!ts_status) |
---|
| 2368 | + ktime_get_real_ts64(&ts); |
---|
2318 | 2369 | |
---|
2319 | 2370 | status |= ts_status; |
---|
2320 | 2371 | |
---|
.. | .. |
---|
2370 | 2421 | sll->sll_hatype = dev->type; |
---|
2371 | 2422 | sll->sll_protocol = skb->protocol; |
---|
2372 | 2423 | sll->sll_pkttype = skb->pkt_type; |
---|
2373 | | - if (unlikely(po->origdev)) |
---|
| 2424 | + if (unlikely(packet_sock_flag(po, PACKET_SOCK_ORIGDEV))) |
---|
2374 | 2425 | sll->sll_ifindex = orig_dev->ifindex; |
---|
2375 | 2426 | else |
---|
2376 | 2427 | sll->sll_ifindex = dev->ifindex; |
---|
.. | .. |
---|
2413 | 2464 | return 0; |
---|
2414 | 2465 | |
---|
2415 | 2466 | drop_n_account: |
---|
2416 | | - is_drop_n_account = true; |
---|
2417 | | - po->stats.stats1.tp_drops++; |
---|
2418 | 2467 | spin_unlock(&sk->sk_receive_queue.lock); |
---|
| 2468 | + atomic_inc(&po->tp_drops); |
---|
| 2469 | + is_drop_n_account = true; |
---|
2419 | 2470 | |
---|
2420 | 2471 | sk->sk_data_ready(sk); |
---|
2421 | 2472 | kfree_skb(copy_skb); |
---|
.. | .. |
---|
2441 | 2492 | } |
---|
2442 | 2493 | |
---|
2443 | 2494 | sock_wfree(skb); |
---|
2444 | | -} |
---|
2445 | | - |
---|
2446 | | -static void tpacket_set_protocol(const struct net_device *dev, |
---|
2447 | | - struct sk_buff *skb) |
---|
2448 | | -{ |
---|
2449 | | - if (dev->type == ARPHRD_ETHER) { |
---|
2450 | | - skb_reset_mac_header(skb); |
---|
2451 | | - skb->protocol = eth_hdr(skb)->h_proto; |
---|
2452 | | - } |
---|
2453 | 2495 | } |
---|
2454 | 2496 | |
---|
2455 | 2497 | static int __packet_snd_vnet_parse(struct virtio_net_hdr *vnet_hdr, size_t len) |
---|
.. | .. |
---|
2499 | 2541 | skb->priority = po->sk.sk_priority; |
---|
2500 | 2542 | skb->mark = po->sk.sk_mark; |
---|
2501 | 2543 | skb->tstamp = sockc->transmit_time; |
---|
2502 | | - sock_tx_timestamp(&po->sk, sockc->tsflags, &skb_shinfo(skb)->tx_flags); |
---|
| 2544 | + skb_setup_tx_timestamp(skb, sockc->tsflags); |
---|
2503 | 2545 | skb_zcopy_set_nouarg(skb, ph.raw); |
---|
2504 | 2546 | |
---|
2505 | 2547 | skb_reserve(skb, hlen); |
---|
.. | .. |
---|
2522 | 2564 | return err; |
---|
2523 | 2565 | if (!dev_validate_header(dev, skb->data, hdrlen)) |
---|
2524 | 2566 | return -EINVAL; |
---|
2525 | | - if (!skb->protocol) |
---|
2526 | | - tpacket_set_protocol(dev, skb); |
---|
2527 | 2567 | |
---|
2528 | 2568 | data += hdrlen; |
---|
2529 | 2569 | to_write -= hdrlen; |
---|
.. | .. |
---|
2558 | 2598 | len = ((to_write > len_max) ? len_max : to_write); |
---|
2559 | 2599 | } |
---|
2560 | 2600 | |
---|
2561 | | - skb_probe_transport_header(skb, 0); |
---|
| 2601 | + packet_parse_headers(skb, sock); |
---|
2562 | 2602 | |
---|
2563 | 2603 | return tp_len; |
---|
2564 | 2604 | } |
---|
.. | .. |
---|
2788 | 2828 | packet_inc_pending(&po->tx_ring); |
---|
2789 | 2829 | |
---|
2790 | 2830 | status = TP_STATUS_SEND_REQUEST; |
---|
2791 | | - err = po->xmit(skb); |
---|
2792 | | - if (unlikely(err > 0)) { |
---|
2793 | | - err = net_xmit_errno(err); |
---|
| 2831 | + /* Paired with WRITE_ONCE() in packet_setsockopt() */ |
---|
| 2832 | + err = READ_ONCE(po->xmit)(skb); |
---|
| 2833 | + if (unlikely(err != 0)) { |
---|
| 2834 | + if (err > 0) |
---|
| 2835 | + err = net_xmit_errno(err); |
---|
2794 | 2836 | if (err && __packet_get_status(po, ph) == |
---|
2795 | 2837 | TP_STATUS_AVAILABLE) { |
---|
2796 | 2838 | /* skb was destructed already */ |
---|
.. | .. |
---|
2957 | 2999 | if (err) |
---|
2958 | 3000 | goto out_free; |
---|
2959 | 3001 | |
---|
2960 | | - if (sock->type == SOCK_RAW && |
---|
2961 | | - !dev_validate_header(dev, skb->data, len)) { |
---|
| 3002 | + if ((sock->type == SOCK_RAW && |
---|
| 3003 | + !dev_validate_header(dev, skb->data, len)) || !skb->len) { |
---|
2962 | 3004 | err = -EINVAL; |
---|
2963 | 3005 | goto out_free; |
---|
2964 | 3006 | } |
---|
2965 | 3007 | |
---|
2966 | | - sock_tx_timestamp(sk, sockc.tsflags, &skb_shinfo(skb)->tx_flags); |
---|
| 3008 | + skb_setup_tx_timestamp(skb, sockc.tsflags); |
---|
2967 | 3009 | |
---|
2968 | 3010 | if (!vnet_hdr.gso_type && (len > dev->mtu + reserve + extra_len) && |
---|
2969 | 3011 | !packet_extra_vlan_len_allowed(dev, skb)) { |
---|
.. | .. |
---|
2977 | 3019 | skb->mark = sockc.mark; |
---|
2978 | 3020 | skb->tstamp = sockc.transmit_time; |
---|
2979 | 3021 | |
---|
| 3022 | + if (unlikely(extra_len == 4)) |
---|
| 3023 | + skb->no_fcs = 1; |
---|
| 3024 | + |
---|
| 3025 | + packet_parse_headers(skb, sock); |
---|
| 3026 | + |
---|
2980 | 3027 | if (has_vnet_hdr) { |
---|
2981 | 3028 | err = virtio_net_hdr_to_skb(skb, &vnet_hdr, vio_le()); |
---|
2982 | 3029 | if (err) |
---|
.. | .. |
---|
2985 | 3032 | virtio_net_hdr_set_proto(skb, &vnet_hdr); |
---|
2986 | 3033 | } |
---|
2987 | 3034 | |
---|
2988 | | - skb_probe_transport_header(skb, reserve); |
---|
2989 | | - |
---|
2990 | | - if (unlikely(extra_len == 4)) |
---|
2991 | | - skb->no_fcs = 1; |
---|
2992 | | - |
---|
2993 | | - err = po->xmit(skb); |
---|
2994 | | - if (err > 0 && (err = net_xmit_errno(err)) != 0) |
---|
2995 | | - goto out_unlock; |
---|
| 3035 | + /* Paired with WRITE_ONCE() in packet_setsockopt() */ |
---|
| 3036 | + err = READ_ONCE(po->xmit)(skb); |
---|
| 3037 | + if (unlikely(err != 0)) { |
---|
| 3038 | + if (err > 0) |
---|
| 3039 | + err = net_xmit_errno(err); |
---|
| 3040 | + if (err) |
---|
| 3041 | + goto out_unlock; |
---|
| 3042 | + } |
---|
2996 | 3043 | |
---|
2997 | 3044 | dev_put(dev); |
---|
2998 | 3045 | |
---|
.. | .. |
---|
3012 | 3059 | struct sock *sk = sock->sk; |
---|
3013 | 3060 | struct packet_sock *po = pkt_sk(sk); |
---|
3014 | 3061 | |
---|
3015 | | - if (po->tx_ring.pg_vec) |
---|
| 3062 | + /* Reading tx_ring.pg_vec without holding pg_vec_lock is racy. |
---|
| 3063 | + * tpacket_snd() will redo the check safely. |
---|
| 3064 | + */ |
---|
| 3065 | + if (data_race(po->tx_ring.pg_vec)) |
---|
3016 | 3066 | return tpacket_snd(po, msg); |
---|
3017 | | - else |
---|
3018 | | - return packet_snd(sock, msg, len); |
---|
| 3067 | + |
---|
| 3068 | + return packet_snd(sock, msg, len); |
---|
3019 | 3069 | } |
---|
3020 | 3070 | |
---|
3021 | 3071 | /* |
---|
.. | .. |
---|
3076 | 3126 | kfree(po->rollover); |
---|
3077 | 3127 | if (f) { |
---|
3078 | 3128 | fanout_release_data(f); |
---|
3079 | | - kfree(f); |
---|
| 3129 | + kvfree(f); |
---|
3080 | 3130 | } |
---|
3081 | 3131 | /* |
---|
3082 | 3132 | * Now the socket is dead. No more input will appear. |
---|
.. | .. |
---|
3111 | 3161 | |
---|
3112 | 3162 | lock_sock(sk); |
---|
3113 | 3163 | spin_lock(&po->bind_lock); |
---|
| 3164 | + if (!proto) |
---|
| 3165 | + proto = po->num; |
---|
| 3166 | + |
---|
3114 | 3167 | rcu_read_lock(); |
---|
3115 | 3168 | |
---|
3116 | 3169 | if (po->fanout) { |
---|
.. | .. |
---|
3213 | 3266 | memcpy(name, uaddr->sa_data, sizeof(uaddr->sa_data)); |
---|
3214 | 3267 | name[sizeof(uaddr->sa_data)] = 0; |
---|
3215 | 3268 | |
---|
3216 | | - return packet_do_bind(sk, name, 0, pkt_sk(sk)->num); |
---|
| 3269 | + return packet_do_bind(sk, name, 0, 0); |
---|
3217 | 3270 | } |
---|
3218 | 3271 | |
---|
3219 | 3272 | static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) |
---|
.. | .. |
---|
3230 | 3283 | if (sll->sll_family != AF_PACKET) |
---|
3231 | 3284 | return -EINVAL; |
---|
3232 | 3285 | |
---|
3233 | | - return packet_do_bind(sk, NULL, sll->sll_ifindex, |
---|
3234 | | - sll->sll_protocol ? : pkt_sk(sk)->num); |
---|
| 3286 | + return packet_do_bind(sk, NULL, sll->sll_ifindex, sll->sll_protocol); |
---|
3235 | 3287 | } |
---|
3236 | 3288 | |
---|
3237 | 3289 | static struct proto packet_proto = { |
---|
.. | .. |
---|
3371 | 3423 | if (skb == NULL) |
---|
3372 | 3424 | goto out; |
---|
3373 | 3425 | |
---|
3374 | | - if (pkt_sk(sk)->pressure) |
---|
3375 | | - packet_rcv_has_room(pkt_sk(sk), NULL); |
---|
| 3426 | + packet_rcv_try_clear_pressure(pkt_sk(sk)); |
---|
3376 | 3427 | |
---|
3377 | 3428 | if (pkt_sk(sk)->has_vnet_hdr) { |
---|
3378 | 3429 | err = packet_rcv_vnet(msg, skb, &len); |
---|
.. | .. |
---|
3407 | 3458 | sock_recv_ts_and_drops(msg, sk, skb); |
---|
3408 | 3459 | |
---|
3409 | 3460 | if (msg->msg_name) { |
---|
| 3461 | + const size_t max_len = min(sizeof(skb->cb), |
---|
| 3462 | + sizeof(struct sockaddr_storage)); |
---|
3410 | 3463 | int copy_len; |
---|
3411 | 3464 | |
---|
3412 | 3465 | /* If the address length field is there to be filled |
---|
.. | .. |
---|
3429 | 3482 | msg->msg_namelen = sizeof(struct sockaddr_ll); |
---|
3430 | 3483 | } |
---|
3431 | 3484 | } |
---|
| 3485 | + if (WARN_ON_ONCE(copy_len > max_len)) { |
---|
| 3486 | + copy_len = max_len; |
---|
| 3487 | + msg->msg_namelen = copy_len; |
---|
| 3488 | + } |
---|
3432 | 3489 | memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa, copy_len); |
---|
3433 | 3490 | } |
---|
3434 | 3491 | |
---|
3435 | | - if (pkt_sk(sk)->auxdata) { |
---|
| 3492 | + if (packet_sock_flag(pkt_sk(sk), PACKET_SOCK_AUXDATA)) { |
---|
3436 | 3493 | struct tpacket_auxdata aux; |
---|
3437 | 3494 | |
---|
3438 | 3495 | aux.tp_status = TP_STATUS_USER; |
---|
3439 | 3496 | if (skb->ip_summed == CHECKSUM_PARTIAL) |
---|
3440 | 3497 | aux.tp_status |= TP_STATUS_CSUMNOTREADY; |
---|
3441 | 3498 | else if (skb->pkt_type != PACKET_OUTGOING && |
---|
3442 | | - (skb->ip_summed == CHECKSUM_COMPLETE || |
---|
3443 | | - skb_csum_unnecessary(skb))) |
---|
| 3499 | + skb_csum_unnecessary(skb)) |
---|
3444 | 3500 | aux.tp_status |= TP_STATUS_CSUM_VALID; |
---|
3445 | 3501 | |
---|
3446 | 3502 | aux.tp_len = origlen; |
---|
.. | .. |
---|
3670 | 3726 | } |
---|
3671 | 3727 | |
---|
3672 | 3728 | static int |
---|
3673 | | -packet_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen) |
---|
| 3729 | +packet_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval, |
---|
| 3730 | + unsigned int optlen) |
---|
3674 | 3731 | { |
---|
3675 | 3732 | struct sock *sk = sock->sk; |
---|
3676 | 3733 | struct packet_sock *po = pkt_sk(sk); |
---|
.. | .. |
---|
3690 | 3747 | return -EINVAL; |
---|
3691 | 3748 | if (len > sizeof(mreq)) |
---|
3692 | 3749 | len = sizeof(mreq); |
---|
3693 | | - if (copy_from_user(&mreq, optval, len)) |
---|
| 3750 | + if (copy_from_sockptr(&mreq, optval, len)) |
---|
3694 | 3751 | return -EFAULT; |
---|
3695 | 3752 | if (len < (mreq.mr_alen + offsetof(struct packet_mreq, mr_address))) |
---|
3696 | 3753 | return -EINVAL; |
---|
.. | .. |
---|
3721 | 3778 | if (optlen < len) { |
---|
3722 | 3779 | ret = -EINVAL; |
---|
3723 | 3780 | } else { |
---|
3724 | | - if (copy_from_user(&req_u.req, optval, len)) |
---|
| 3781 | + if (copy_from_sockptr(&req_u.req, optval, len)) |
---|
3725 | 3782 | ret = -EFAULT; |
---|
3726 | 3783 | else |
---|
3727 | 3784 | ret = packet_set_ring(sk, &req_u, 0, |
---|
.. | .. |
---|
3736 | 3793 | |
---|
3737 | 3794 | if (optlen != sizeof(val)) |
---|
3738 | 3795 | return -EINVAL; |
---|
3739 | | - if (copy_from_user(&val, optval, sizeof(val))) |
---|
| 3796 | + if (copy_from_sockptr(&val, optval, sizeof(val))) |
---|
3740 | 3797 | return -EFAULT; |
---|
3741 | 3798 | |
---|
3742 | 3799 | pkt_sk(sk)->copy_thresh = val; |
---|
.. | .. |
---|
3748 | 3805 | |
---|
3749 | 3806 | if (optlen != sizeof(val)) |
---|
3750 | 3807 | return -EINVAL; |
---|
3751 | | - if (copy_from_user(&val, optval, sizeof(val))) |
---|
| 3808 | + if (copy_from_sockptr(&val, optval, sizeof(val))) |
---|
3752 | 3809 | return -EFAULT; |
---|
3753 | 3810 | switch (val) { |
---|
3754 | 3811 | case TPACKET_V1: |
---|
.. | .. |
---|
3774 | 3831 | |
---|
3775 | 3832 | if (optlen != sizeof(val)) |
---|
3776 | 3833 | return -EINVAL; |
---|
3777 | | - if (copy_from_user(&val, optval, sizeof(val))) |
---|
| 3834 | + if (copy_from_sockptr(&val, optval, sizeof(val))) |
---|
3778 | 3835 | return -EFAULT; |
---|
3779 | 3836 | if (val > INT_MAX) |
---|
3780 | 3837 | return -EINVAL; |
---|
.. | .. |
---|
3794 | 3851 | |
---|
3795 | 3852 | if (optlen != sizeof(val)) |
---|
3796 | 3853 | return -EINVAL; |
---|
3797 | | - if (copy_from_user(&val, optval, sizeof(val))) |
---|
| 3854 | + if (copy_from_sockptr(&val, optval, sizeof(val))) |
---|
3798 | 3855 | return -EFAULT; |
---|
3799 | 3856 | |
---|
3800 | 3857 | lock_sock(sk); |
---|
.. | .. |
---|
3813 | 3870 | |
---|
3814 | 3871 | if (optlen < sizeof(val)) |
---|
3815 | 3872 | return -EINVAL; |
---|
3816 | | - if (copy_from_user(&val, optval, sizeof(val))) |
---|
| 3873 | + if (copy_from_sockptr(&val, optval, sizeof(val))) |
---|
3817 | 3874 | return -EFAULT; |
---|
3818 | 3875 | |
---|
3819 | | - lock_sock(sk); |
---|
3820 | | - po->auxdata = !!val; |
---|
3821 | | - release_sock(sk); |
---|
| 3876 | + packet_sock_flag_set(po, PACKET_SOCK_AUXDATA, val); |
---|
3822 | 3877 | return 0; |
---|
3823 | 3878 | } |
---|
3824 | 3879 | case PACKET_ORIGDEV: |
---|
.. | .. |
---|
3827 | 3882 | |
---|
3828 | 3883 | if (optlen < sizeof(val)) |
---|
3829 | 3884 | return -EINVAL; |
---|
3830 | | - if (copy_from_user(&val, optval, sizeof(val))) |
---|
| 3885 | + if (copy_from_sockptr(&val, optval, sizeof(val))) |
---|
3831 | 3886 | return -EFAULT; |
---|
3832 | 3887 | |
---|
3833 | | - lock_sock(sk); |
---|
3834 | | - po->origdev = !!val; |
---|
3835 | | - release_sock(sk); |
---|
| 3888 | + packet_sock_flag_set(po, PACKET_SOCK_ORIGDEV, val); |
---|
3836 | 3889 | return 0; |
---|
3837 | 3890 | } |
---|
3838 | 3891 | case PACKET_VNET_HDR: |
---|
.. | .. |
---|
3843 | 3896 | return -EINVAL; |
---|
3844 | 3897 | if (optlen < sizeof(val)) |
---|
3845 | 3898 | return -EINVAL; |
---|
3846 | | - if (copy_from_user(&val, optval, sizeof(val))) |
---|
| 3899 | + if (copy_from_sockptr(&val, optval, sizeof(val))) |
---|
3847 | 3900 | return -EFAULT; |
---|
3848 | 3901 | |
---|
3849 | 3902 | lock_sock(sk); |
---|
.. | .. |
---|
3862 | 3915 | |
---|
3863 | 3916 | if (optlen != sizeof(val)) |
---|
3864 | 3917 | return -EINVAL; |
---|
3865 | | - if (copy_from_user(&val, optval, sizeof(val))) |
---|
| 3918 | + if (copy_from_sockptr(&val, optval, sizeof(val))) |
---|
3866 | 3919 | return -EFAULT; |
---|
3867 | 3920 | |
---|
3868 | 3921 | po->tp_tstamp = val; |
---|
.. | .. |
---|
3870 | 3923 | } |
---|
3871 | 3924 | case PACKET_FANOUT: |
---|
3872 | 3925 | { |
---|
3873 | | - int val; |
---|
| 3926 | + struct fanout_args args = { 0 }; |
---|
3874 | 3927 | |
---|
3875 | | - if (optlen != sizeof(val)) |
---|
| 3928 | + if (optlen != sizeof(int) && optlen != sizeof(args)) |
---|
3876 | 3929 | return -EINVAL; |
---|
3877 | | - if (copy_from_user(&val, optval, sizeof(val))) |
---|
| 3930 | + if (copy_from_sockptr(&args, optval, optlen)) |
---|
3878 | 3931 | return -EFAULT; |
---|
3879 | 3932 | |
---|
3880 | | - return fanout_add(sk, val & 0xffff, val >> 16); |
---|
| 3933 | + return fanout_add(sk, &args); |
---|
3881 | 3934 | } |
---|
3882 | 3935 | case PACKET_FANOUT_DATA: |
---|
3883 | 3936 | { |
---|
.. | .. |
---|
3887 | 3940 | |
---|
3888 | 3941 | return fanout_set_data(po, optval, optlen); |
---|
3889 | 3942 | } |
---|
| 3943 | + case PACKET_IGNORE_OUTGOING: |
---|
| 3944 | + { |
---|
| 3945 | + int val; |
---|
| 3946 | + |
---|
| 3947 | + if (optlen != sizeof(val)) |
---|
| 3948 | + return -EINVAL; |
---|
| 3949 | + if (copy_from_sockptr(&val, optval, sizeof(val))) |
---|
| 3950 | + return -EFAULT; |
---|
| 3951 | + if (val < 0 || val > 1) |
---|
| 3952 | + return -EINVAL; |
---|
| 3953 | + |
---|
| 3954 | + po->prot_hook.ignore_outgoing = !!val; |
---|
| 3955 | + return 0; |
---|
| 3956 | + } |
---|
3890 | 3957 | case PACKET_TX_HAS_OFF: |
---|
3891 | 3958 | { |
---|
3892 | 3959 | unsigned int val; |
---|
3893 | 3960 | |
---|
3894 | 3961 | if (optlen != sizeof(val)) |
---|
3895 | 3962 | return -EINVAL; |
---|
3896 | | - if (copy_from_user(&val, optval, sizeof(val))) |
---|
| 3963 | + if (copy_from_sockptr(&val, optval, sizeof(val))) |
---|
3897 | 3964 | return -EFAULT; |
---|
3898 | 3965 | |
---|
3899 | 3966 | lock_sock(sk); |
---|
.. | .. |
---|
3912 | 3979 | |
---|
3913 | 3980 | if (optlen != sizeof(val)) |
---|
3914 | 3981 | return -EINVAL; |
---|
3915 | | - if (copy_from_user(&val, optval, sizeof(val))) |
---|
| 3982 | + if (copy_from_sockptr(&val, optval, sizeof(val))) |
---|
3916 | 3983 | return -EFAULT; |
---|
3917 | 3984 | |
---|
3918 | | - po->xmit = val ? packet_direct_xmit : dev_queue_xmit; |
---|
| 3985 | + /* Paired with all lockless reads of po->xmit */ |
---|
| 3986 | + WRITE_ONCE(po->xmit, val ? packet_direct_xmit : dev_queue_xmit); |
---|
3919 | 3987 | return 0; |
---|
3920 | 3988 | } |
---|
3921 | 3989 | default: |
---|
.. | .. |
---|
3933 | 4001 | void *data = &val; |
---|
3934 | 4002 | union tpacket_stats_u st; |
---|
3935 | 4003 | struct tpacket_rollover_stats rstats; |
---|
| 4004 | + int drops; |
---|
3936 | 4005 | |
---|
3937 | 4006 | if (level != SOL_PACKET) |
---|
3938 | 4007 | return -ENOPROTOOPT; |
---|
.. | .. |
---|
3949 | 4018 | memcpy(&st, &po->stats, sizeof(st)); |
---|
3950 | 4019 | memset(&po->stats, 0, sizeof(po->stats)); |
---|
3951 | 4020 | spin_unlock_bh(&sk->sk_receive_queue.lock); |
---|
| 4021 | + drops = atomic_xchg(&po->tp_drops, 0); |
---|
3952 | 4022 | |
---|
3953 | 4023 | if (po->tp_version == TPACKET_V3) { |
---|
3954 | 4024 | lv = sizeof(struct tpacket_stats_v3); |
---|
3955 | | - st.stats3.tp_packets += st.stats3.tp_drops; |
---|
| 4025 | + st.stats3.tp_drops = drops; |
---|
| 4026 | + st.stats3.tp_packets += drops; |
---|
3956 | 4027 | data = &st.stats3; |
---|
3957 | 4028 | } else { |
---|
3958 | 4029 | lv = sizeof(struct tpacket_stats); |
---|
3959 | | - st.stats1.tp_packets += st.stats1.tp_drops; |
---|
| 4030 | + st.stats1.tp_drops = drops; |
---|
| 4031 | + st.stats1.tp_packets += drops; |
---|
3960 | 4032 | data = &st.stats1; |
---|
3961 | 4033 | } |
---|
3962 | 4034 | |
---|
3963 | 4035 | break; |
---|
3964 | 4036 | case PACKET_AUXDATA: |
---|
3965 | | - val = po->auxdata; |
---|
| 4037 | + val = packet_sock_flag(po, PACKET_SOCK_AUXDATA); |
---|
3966 | 4038 | break; |
---|
3967 | 4039 | case PACKET_ORIGDEV: |
---|
3968 | | - val = po->origdev; |
---|
| 4040 | + val = packet_sock_flag(po, PACKET_SOCK_ORIGDEV); |
---|
3969 | 4041 | break; |
---|
3970 | 4042 | case PACKET_VNET_HDR: |
---|
3971 | 4043 | val = po->has_vnet_hdr; |
---|
.. | .. |
---|
4010 | 4082 | ((u32)po->fanout->flags << 24)) : |
---|
4011 | 4083 | 0); |
---|
4012 | 4084 | break; |
---|
| 4085 | + case PACKET_IGNORE_OUTGOING: |
---|
| 4086 | + val = po->prot_hook.ignore_outgoing; |
---|
| 4087 | + break; |
---|
4013 | 4088 | case PACKET_ROLLOVER_STATS: |
---|
4014 | 4089 | if (!po->rollover) |
---|
4015 | 4090 | return -EINVAL; |
---|
.. | .. |
---|
4038 | 4113 | return 0; |
---|
4039 | 4114 | } |
---|
4040 | 4115 | |
---|
4041 | | - |
---|
4042 | | -#ifdef CONFIG_COMPAT |
---|
4043 | | -static int compat_packet_setsockopt(struct socket *sock, int level, int optname, |
---|
4044 | | - char __user *optval, unsigned int optlen) |
---|
4045 | | -{ |
---|
4046 | | - struct packet_sock *po = pkt_sk(sock->sk); |
---|
4047 | | - |
---|
4048 | | - if (level != SOL_PACKET) |
---|
4049 | | - return -ENOPROTOOPT; |
---|
4050 | | - |
---|
4051 | | - if (optname == PACKET_FANOUT_DATA && |
---|
4052 | | - po->fanout && po->fanout->type == PACKET_FANOUT_CBPF) { |
---|
4053 | | - optval = (char __user *)get_compat_bpf_fprog(optval); |
---|
4054 | | - if (!optval) |
---|
4055 | | - return -EFAULT; |
---|
4056 | | - optlen = sizeof(struct sock_fprog); |
---|
4057 | | - } |
---|
4058 | | - |
---|
4059 | | - return packet_setsockopt(sock, level, optname, optval, optlen); |
---|
4060 | | -} |
---|
4061 | | -#endif |
---|
4062 | | - |
---|
4063 | 4116 | static int packet_notifier(struct notifier_block *this, |
---|
4064 | 4117 | unsigned long msg, void *ptr) |
---|
4065 | 4118 | { |
---|
.. | .. |
---|
4075 | 4128 | case NETDEV_UNREGISTER: |
---|
4076 | 4129 | if (po->mclist) |
---|
4077 | 4130 | packet_dev_mclist_delete(dev, &po->mclist); |
---|
4078 | | - /* fallthrough */ |
---|
| 4131 | + fallthrough; |
---|
4079 | 4132 | |
---|
4080 | 4133 | case NETDEV_DOWN: |
---|
4081 | 4134 | if (dev->ifindex == po->ifindex) { |
---|
.. | .. |
---|
4135 | 4188 | spin_unlock_bh(&sk->sk_receive_queue.lock); |
---|
4136 | 4189 | return put_user(amount, (int __user *)arg); |
---|
4137 | 4190 | } |
---|
4138 | | - case SIOCGSTAMP: |
---|
4139 | | - return sock_get_timestamp(sk, (struct timeval __user *)arg); |
---|
4140 | | - case SIOCGSTAMPNS: |
---|
4141 | | - return sock_get_timestampns(sk, (struct timespec __user *)arg); |
---|
4142 | | - |
---|
4143 | 4191 | #ifdef CONFIG_INET |
---|
4144 | 4192 | case SIOCADDRT: |
---|
4145 | 4193 | case SIOCDELRT: |
---|
.. | .. |
---|
4177 | 4225 | TP_STATUS_KERNEL)) |
---|
4178 | 4226 | mask |= EPOLLIN | EPOLLRDNORM; |
---|
4179 | 4227 | } |
---|
4180 | | - if (po->pressure && __packet_rcv_has_room(po, NULL) == ROOM_NORMAL) |
---|
4181 | | - po->pressure = 0; |
---|
| 4228 | + packet_rcv_try_clear_pressure(po); |
---|
4182 | 4229 | spin_unlock_bh(&sk->sk_receive_queue.lock); |
---|
4183 | 4230 | spin_lock_bh(&sk->sk_write_queue.lock); |
---|
4184 | 4231 | if (po->tx_ring.pg_vec) { |
---|
.. | .. |
---|
4297 | 4344 | struct packet_ring_buffer *rb; |
---|
4298 | 4345 | struct sk_buff_head *rb_queue; |
---|
4299 | 4346 | __be16 num; |
---|
4300 | | - int err = -EINVAL; |
---|
| 4347 | + int err; |
---|
4301 | 4348 | /* Added to avoid minimal code churn */ |
---|
4302 | 4349 | struct tpacket_req *req = &req_u->req; |
---|
4303 | 4350 | |
---|
.. | .. |
---|
4527 | 4574 | .getname = packet_getname_spkt, |
---|
4528 | 4575 | .poll = datagram_poll, |
---|
4529 | 4576 | .ioctl = packet_ioctl, |
---|
| 4577 | + .gettstamp = sock_gettstamp, |
---|
4530 | 4578 | .listen = sock_no_listen, |
---|
4531 | 4579 | .shutdown = sock_no_shutdown, |
---|
4532 | | - .setsockopt = sock_no_setsockopt, |
---|
4533 | | - .getsockopt = sock_no_getsockopt, |
---|
4534 | 4580 | .sendmsg = packet_sendmsg_spkt, |
---|
4535 | 4581 | .recvmsg = packet_recvmsg, |
---|
4536 | 4582 | .mmap = sock_no_mmap, |
---|
.. | .. |
---|
4548 | 4594 | .getname = packet_getname, |
---|
4549 | 4595 | .poll = packet_poll, |
---|
4550 | 4596 | .ioctl = packet_ioctl, |
---|
| 4597 | + .gettstamp = sock_gettstamp, |
---|
4551 | 4598 | .listen = sock_no_listen, |
---|
4552 | 4599 | .shutdown = sock_no_shutdown, |
---|
4553 | 4600 | .setsockopt = packet_setsockopt, |
---|
4554 | 4601 | .getsockopt = packet_getsockopt, |
---|
4555 | | -#ifdef CONFIG_COMPAT |
---|
4556 | | - .compat_setsockopt = compat_packet_setsockopt, |
---|
4557 | | -#endif |
---|
4558 | 4602 | .sendmsg = packet_sendmsg, |
---|
4559 | 4603 | .recvmsg = packet_recvmsg, |
---|
4560 | 4604 | .mmap = packet_mmap, |
---|
.. | .. |
---|
4631 | 4675 | mutex_init(&net->packet.sklist_lock); |
---|
4632 | 4676 | INIT_HLIST_HEAD(&net->packet.sklist); |
---|
4633 | 4677 | |
---|
| 4678 | +#ifdef CONFIG_PROC_FS |
---|
4634 | 4679 | if (!proc_create_net("packet", 0, net->proc_net, &packet_seq_ops, |
---|
4635 | 4680 | sizeof(struct seq_net_private))) |
---|
4636 | 4681 | return -ENOMEM; |
---|
| 4682 | +#endif /* CONFIG_PROC_FS */ |
---|
4637 | 4683 | |
---|
4638 | 4684 | return 0; |
---|
4639 | 4685 | } |
---|