| .. | .. |
|---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-or-later |
|---|
| 1 | 2 | /* |
|---|
| 2 | 3 | * INET An implementation of the TCP/IP protocol suite for the LINUX |
|---|
| 3 | 4 | * operating system. INET is implemented using the BSD Socket |
|---|
| .. | .. |
|---|
| 43 | 44 | * Chetan Loke : Implemented TPACKET_V3 block abstraction |
|---|
| 44 | 45 | * layer. |
|---|
| 45 | 46 | * Copyright (C) 2011, <lokec@ccs.neu.edu> |
|---|
| 46 | | - * |
|---|
| 47 | | - * |
|---|
| 48 | | - * This program is free software; you can redistribute it and/or |
|---|
| 49 | | - * modify it under the terms of the GNU General Public License |
|---|
| 50 | | - * as published by the Free Software Foundation; either version |
|---|
| 51 | | - * 2 of the License, or (at your option) any later version. |
|---|
| 52 | | - * |
|---|
| 53 | 47 | */ |
|---|
| 54 | 48 | |
|---|
| 55 | 49 | #include <linux/types.h> |
|---|
| .. | .. |
|---|
| 63 | 57 | #include <linux/if_packet.h> |
|---|
| 64 | 58 | #include <linux/wireless.h> |
|---|
| 65 | 59 | #include <linux/kernel.h> |
|---|
| 66 | | -#include <linux/delay.h> |
|---|
| 67 | 60 | #include <linux/kmod.h> |
|---|
| 68 | 61 | #include <linux/slab.h> |
|---|
| 69 | 62 | #include <linux/vmalloc.h> |
|---|
| .. | .. |
|---|
| 100 | 93 | |
|---|
| 101 | 94 | /* |
|---|
| 102 | 95 | Assumptions: |
|---|
| 103 | | - - if device has no dev->hard_header routine, it adds and removes ll header |
|---|
| 104 | | - inside itself. In this case ll header is invisible outside of device, |
|---|
| 105 | | - but higher levels still should reserve dev->hard_header_len. |
|---|
| 106 | | - Some devices are enough clever to reallocate skb, when header |
|---|
| 107 | | - will not fit to reserved space (tunnel), another ones are silly |
|---|
| 108 | | - (PPP). |
|---|
| 96 | + - If the device has no dev->header_ops->create, there is no LL header |
|---|
| 97 | + visible above the device. In this case, its hard_header_len should be 0. |
|---|
| 98 | + The device may prepend its own header internally. In this case, its |
|---|
| 99 | + needed_headroom should be set to the space needed for it to add its |
|---|
| 100 | + internal header. |
|---|
| 101 | + For example, a WiFi driver pretending to be an Ethernet driver should |
|---|
| 102 | + set its hard_header_len to be the Ethernet header length, and set its |
|---|
| 103 | + needed_headroom to be (the real WiFi header length - the fake Ethernet |
|---|
| 104 | + header length). |
|---|
| 109 | 105 | - packet socket receives packets with pulled ll header, |
|---|
| 110 | 106 | so that SOCK_RAW should push it back. |
|---|
| 111 | 107 | |
|---|
| 112 | 108 | On receive: |
|---|
| 113 | 109 | ----------- |
|---|
| 114 | 110 | |
|---|
| 115 | | -Incoming, dev->hard_header!=NULL |
|---|
| 111 | +Incoming, dev_has_header(dev) == true |
|---|
| 116 | 112 | mac_header -> ll header |
|---|
| 117 | 113 | data -> data |
|---|
| 118 | 114 | |
|---|
| 119 | | -Outgoing, dev->hard_header!=NULL |
|---|
| 115 | +Outgoing, dev_has_header(dev) == true |
|---|
| 120 | 116 | mac_header -> ll header |
|---|
| 121 | 117 | data -> ll header |
|---|
| 122 | 118 | |
|---|
| 123 | | -Incoming, dev->hard_header==NULL |
|---|
| 124 | | - mac_header -> UNKNOWN position. It is very likely, that it points to ll |
|---|
| 125 | | - header. PPP makes it, that is wrong, because introduce |
|---|
| 126 | | - assymetry between rx and tx paths. |
|---|
| 119 | +Incoming, dev_has_header(dev) == false |
|---|
| 120 | + mac_header -> data |
|---|
| 121 | + However drivers often make it point to the ll header. |
|---|
| 122 | + This is incorrect because the ll header should be invisible to us. |
|---|
| 127 | 123 | data -> data |
|---|
| 128 | 124 | |
|---|
| 129 | | -Outgoing, dev->hard_header==NULL |
|---|
| 130 | | - mac_header -> data. ll header is still not built! |
|---|
| 125 | +Outgoing, dev_has_header(dev) == false |
|---|
| 126 | + mac_header -> data. ll header is invisible to us. |
|---|
| 131 | 127 | data -> data |
|---|
| 132 | 128 | |
|---|
| 133 | 129 | Resume |
|---|
| 134 | | - If dev->hard_header==NULL we are unlikely to restore sensible ll header. |
|---|
| 130 | + If dev_has_header(dev) == false we are unable to restore the ll header, |
|---|
| 131 | + because it is invisible to us. |
|---|
| 135 | 132 | |
|---|
| 136 | 133 | |
|---|
| 137 | 134 | On transmit: |
|---|
| 138 | 135 | ------------ |
|---|
| 139 | 136 | |
|---|
| 140 | | -dev->hard_header != NULL |
|---|
| 137 | +dev->header_ops != NULL |
|---|
| 141 | 138 | mac_header -> ll header |
|---|
| 142 | 139 | data -> ll header |
|---|
| 143 | 140 | |
|---|
| 144 | | -dev->hard_header == NULL (ll header is added by device, we cannot control it) |
|---|
| 141 | +dev->header_ops == NULL (ll header is invisible to us) |
|---|
| 145 | 142 | mac_header -> data |
|---|
| 146 | 143 | data -> data |
|---|
| 147 | 144 | |
|---|
| 148 | | - We should set nh.raw on output to correct posistion, |
|---|
| 145 | + We should set network_header on output to the correct position, |
|---|
| 149 | 146 | packet classifier depends on it. |
|---|
| 150 | 147 | */ |
|---|
| 151 | 148 | |
|---|
| .. | .. |
|---|
| 184 | 181 | #define BLOCK_LEN(x) ((x)->hdr.bh1.blk_len) |
|---|
| 185 | 182 | #define BLOCK_SNUM(x) ((x)->hdr.bh1.seq_num) |
|---|
| 186 | 183 | #define BLOCK_O2PRIV(x) ((x)->offset_to_priv) |
|---|
| 187 | | -#define BLOCK_PRIV(x) ((void *)((char *)(x) + BLOCK_O2PRIV(x))) |
|---|
| 188 | 184 | |
|---|
| 189 | 185 | struct packet_sock; |
|---|
| 190 | 186 | static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, |
|---|
| .. | .. |
|---|
| 273 | 269 | |
|---|
| 274 | 270 | static bool packet_use_direct_xmit(const struct packet_sock *po) |
|---|
| 275 | 271 | { |
|---|
| 276 | | - return po->xmit == packet_direct_xmit; |
|---|
| 277 | | -} |
|---|
| 278 | | - |
|---|
| 279 | | -static u16 __packet_pick_tx_queue(struct net_device *dev, struct sk_buff *skb, |
|---|
| 280 | | - struct net_device *sb_dev) |
|---|
| 281 | | -{ |
|---|
| 282 | | - return dev_pick_tx_cpu_id(dev, skb, sb_dev, NULL); |
|---|
| 272 | + /* Paired with WRITE_ONCE() in packet_setsockopt() */ |
|---|
| 273 | + return READ_ONCE(po->xmit) == packet_direct_xmit; |
|---|
| 283 | 274 | } |
|---|
| 284 | 275 | |
|---|
| 285 | 276 | static u16 packet_pick_tx_queue(struct sk_buff *skb) |
|---|
| 286 | 277 | { |
|---|
| 287 | 278 | struct net_device *dev = skb->dev; |
|---|
| 288 | 279 | const struct net_device_ops *ops = dev->netdev_ops; |
|---|
| 280 | + int cpu = raw_smp_processor_id(); |
|---|
| 289 | 281 | u16 queue_index; |
|---|
| 290 | 282 | |
|---|
| 283 | +#ifdef CONFIG_XPS |
|---|
| 284 | + skb->sender_cpu = cpu + 1; |
|---|
| 285 | +#endif |
|---|
| 286 | + skb_record_rx_queue(skb, cpu % dev->real_num_tx_queues); |
|---|
| 291 | 287 | if (ops->ndo_select_queue) { |
|---|
| 292 | | - queue_index = ops->ndo_select_queue(dev, skb, NULL, |
|---|
| 293 | | - __packet_pick_tx_queue); |
|---|
| 288 | + queue_index = ops->ndo_select_queue(dev, skb, NULL); |
|---|
| 294 | 289 | queue_index = netdev_cap_txqueue(dev, queue_index); |
|---|
| 295 | 290 | } else { |
|---|
| 296 | | - queue_index = __packet_pick_tx_queue(dev, skb, NULL); |
|---|
| 291 | + queue_index = netdev_pick_tx(dev, skb, NULL); |
|---|
| 297 | 292 | } |
|---|
| 298 | 293 | |
|---|
| 299 | 294 | return queue_index; |
|---|
| .. | .. |
|---|
| 371 | 366 | { |
|---|
| 372 | 367 | union tpacket_uhdr h; |
|---|
| 373 | 368 | |
|---|
| 369 | + /* WRITE_ONCE() are paired with READ_ONCE() in __packet_get_status */ |
|---|
| 370 | + |
|---|
| 374 | 371 | h.raw = frame; |
|---|
| 375 | 372 | switch (po->tp_version) { |
|---|
| 376 | 373 | case TPACKET_V1: |
|---|
| 377 | | - h.h1->tp_status = status; |
|---|
| 374 | + WRITE_ONCE(h.h1->tp_status, status); |
|---|
| 378 | 375 | flush_dcache_page(pgv_to_page(&h.h1->tp_status)); |
|---|
| 379 | 376 | break; |
|---|
| 380 | 377 | case TPACKET_V2: |
|---|
| 381 | | - h.h2->tp_status = status; |
|---|
| 378 | + WRITE_ONCE(h.h2->tp_status, status); |
|---|
| 382 | 379 | flush_dcache_page(pgv_to_page(&h.h2->tp_status)); |
|---|
| 383 | 380 | break; |
|---|
| 384 | 381 | case TPACKET_V3: |
|---|
| 385 | | - h.h3->tp_status = status; |
|---|
| 382 | + WRITE_ONCE(h.h3->tp_status, status); |
|---|
| 386 | 383 | flush_dcache_page(pgv_to_page(&h.h3->tp_status)); |
|---|
| 387 | 384 | break; |
|---|
| 388 | 385 | default: |
|---|
| .. | .. |
|---|
| 393 | 390 | smp_wmb(); |
|---|
| 394 | 391 | } |
|---|
| 395 | 392 | |
|---|
| 396 | | -static int __packet_get_status(struct packet_sock *po, void *frame) |
|---|
| 393 | +static int __packet_get_status(const struct packet_sock *po, void *frame) |
|---|
| 397 | 394 | { |
|---|
| 398 | 395 | union tpacket_uhdr h; |
|---|
| 399 | 396 | |
|---|
| 400 | 397 | smp_rmb(); |
|---|
| 401 | 398 | |
|---|
| 399 | + /* READ_ONCE() are paired with WRITE_ONCE() in __packet_set_status */ |
|---|
| 400 | + |
|---|
| 402 | 401 | h.raw = frame; |
|---|
| 403 | 402 | switch (po->tp_version) { |
|---|
| 404 | 403 | case TPACKET_V1: |
|---|
| 405 | 404 | flush_dcache_page(pgv_to_page(&h.h1->tp_status)); |
|---|
| 406 | | - return h.h1->tp_status; |
|---|
| 405 | + return READ_ONCE(h.h1->tp_status); |
|---|
| 407 | 406 | case TPACKET_V2: |
|---|
| 408 | 407 | flush_dcache_page(pgv_to_page(&h.h2->tp_status)); |
|---|
| 409 | | - return h.h2->tp_status; |
|---|
| 408 | + return READ_ONCE(h.h2->tp_status); |
|---|
| 410 | 409 | case TPACKET_V3: |
|---|
| 411 | 410 | flush_dcache_page(pgv_to_page(&h.h3->tp_status)); |
|---|
| 412 | | - return h.h3->tp_status; |
|---|
| 411 | + return READ_ONCE(h.h3->tp_status); |
|---|
| 413 | 412 | default: |
|---|
| 414 | 413 | WARN(1, "TPACKET version not supported.\n"); |
|---|
| 415 | 414 | BUG(); |
|---|
| .. | .. |
|---|
| 417 | 416 | } |
|---|
| 418 | 417 | } |
|---|
| 419 | 418 | |
|---|
| 420 | | -static __u32 tpacket_get_timestamp(struct sk_buff *skb, struct timespec *ts, |
|---|
| 419 | +static __u32 tpacket_get_timestamp(struct sk_buff *skb, struct timespec64 *ts, |
|---|
| 421 | 420 | unsigned int flags) |
|---|
| 422 | 421 | { |
|---|
| 423 | 422 | struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb); |
|---|
| 424 | 423 | |
|---|
| 425 | 424 | if (shhwtstamps && |
|---|
| 426 | 425 | (flags & SOF_TIMESTAMPING_RAW_HARDWARE) && |
|---|
| 427 | | - ktime_to_timespec_cond(shhwtstamps->hwtstamp, ts)) |
|---|
| 426 | + ktime_to_timespec64_cond(shhwtstamps->hwtstamp, ts)) |
|---|
| 428 | 427 | return TP_STATUS_TS_RAW_HARDWARE; |
|---|
| 429 | 428 | |
|---|
| 430 | | - if (ktime_to_timespec_cond(skb->tstamp, ts)) |
|---|
| 429 | + if ((flags & SOF_TIMESTAMPING_SOFTWARE) && |
|---|
| 430 | + ktime_to_timespec64_cond(skb->tstamp, ts)) |
|---|
| 431 | 431 | return TP_STATUS_TS_SOFTWARE; |
|---|
| 432 | 432 | |
|---|
| 433 | 433 | return 0; |
|---|
| .. | .. |
|---|
| 437 | 437 | struct sk_buff *skb) |
|---|
| 438 | 438 | { |
|---|
| 439 | 439 | union tpacket_uhdr h; |
|---|
| 440 | | - struct timespec ts; |
|---|
| 440 | + struct timespec64 ts; |
|---|
| 441 | 441 | __u32 ts_status; |
|---|
| 442 | 442 | |
|---|
| 443 | 443 | if (!(ts_status = tpacket_get_timestamp(skb, &ts, po->tp_tstamp))) |
|---|
| 444 | 444 | return 0; |
|---|
| 445 | 445 | |
|---|
| 446 | 446 | h.raw = frame; |
|---|
| 447 | + /* |
|---|
| 448 | + * versions 1 through 3 overflow the timestamps in y2106, since they |
|---|
| 449 | + * all store the seconds in a 32-bit unsigned integer. |
|---|
| 450 | + * If we create a version 4, that should have a 64-bit timestamp, |
|---|
| 451 | + * either 64-bit seconds + 32-bit nanoseconds, or just 64-bit |
|---|
| 452 | + * nanoseconds. |
|---|
| 453 | + */ |
|---|
| 447 | 454 | switch (po->tp_version) { |
|---|
| 448 | 455 | case TPACKET_V1: |
|---|
| 449 | 456 | h.h1->tp_sec = ts.tv_sec; |
|---|
| .. | .. |
|---|
| 469 | 476 | return ts_status; |
|---|
| 470 | 477 | } |
|---|
| 471 | 478 | |
|---|
| 472 | | -static void *packet_lookup_frame(struct packet_sock *po, |
|---|
| 473 | | - struct packet_ring_buffer *rb, |
|---|
| 474 | | - unsigned int position, |
|---|
| 475 | | - int status) |
|---|
| 479 | +static void *packet_lookup_frame(const struct packet_sock *po, |
|---|
| 480 | + const struct packet_ring_buffer *rb, |
|---|
| 481 | + unsigned int position, |
|---|
| 482 | + int status) |
|---|
| 476 | 483 | { |
|---|
| 477 | 484 | unsigned int pg_vec_pos, frame_offset; |
|---|
| 478 | 485 | union tpacket_uhdr h; |
|---|
| .. | .. |
|---|
| 529 | 536 | int blk_size_in_bytes) |
|---|
| 530 | 537 | { |
|---|
| 531 | 538 | struct net_device *dev; |
|---|
| 532 | | - unsigned int mbits = 0, msec = 0, div = 0, tmo = 0; |
|---|
| 539 | + unsigned int mbits, div; |
|---|
| 533 | 540 | struct ethtool_link_ksettings ecmd; |
|---|
| 534 | 541 | int err; |
|---|
| 535 | 542 | |
|---|
| .. | .. |
|---|
| 541 | 548 | } |
|---|
| 542 | 549 | err = __ethtool_get_link_ksettings(dev, &ecmd); |
|---|
| 543 | 550 | rtnl_unlock(); |
|---|
| 544 | | - if (!err) { |
|---|
| 545 | | - /* |
|---|
| 546 | | - * If the link speed is so slow you don't really |
|---|
| 547 | | - * need to worry about perf anyways |
|---|
| 548 | | - */ |
|---|
| 549 | | - if (ecmd.base.speed < SPEED_1000 || |
|---|
| 550 | | - ecmd.base.speed == SPEED_UNKNOWN) { |
|---|
| 551 | | - return DEFAULT_PRB_RETIRE_TOV; |
|---|
| 552 | | - } else { |
|---|
| 553 | | - msec = 1; |
|---|
| 554 | | - div = ecmd.base.speed / 1000; |
|---|
| 555 | | - } |
|---|
| 556 | | - } else |
|---|
| 551 | + if (err) |
|---|
| 557 | 552 | return DEFAULT_PRB_RETIRE_TOV; |
|---|
| 558 | 553 | |
|---|
| 554 | + /* If the link speed is so slow you don't really |
|---|
| 555 | + * need to worry about perf anyways |
|---|
| 556 | + */ |
|---|
| 557 | + if (ecmd.base.speed < SPEED_1000 || |
|---|
| 558 | + ecmd.base.speed == SPEED_UNKNOWN) |
|---|
| 559 | + return DEFAULT_PRB_RETIRE_TOV; |
|---|
| 560 | + |
|---|
| 561 | + div = ecmd.base.speed / 1000; |
|---|
| 559 | 562 | mbits = (blk_size_in_bytes * 8) / (1024 * 1024); |
|---|
| 560 | 563 | |
|---|
| 561 | 564 | if (div) |
|---|
| 562 | 565 | mbits /= div; |
|---|
| 563 | 566 | |
|---|
| 564 | | - tmo = mbits * msec; |
|---|
| 565 | | - |
|---|
| 566 | 567 | if (div) |
|---|
| 567 | | - return tmo+1; |
|---|
| 568 | | - return tmo; |
|---|
| 568 | + return mbits + 1; |
|---|
| 569 | + return mbits; |
|---|
| 569 | 570 | } |
|---|
| 570 | 571 | |
|---|
| 571 | 572 | static void prb_init_ft_ops(struct tpacket_kbdq_core *p1, |
|---|
| .. | .. |
|---|
| 601 | 602 | req_u->req3.tp_block_size); |
|---|
| 602 | 603 | p1->tov_in_jiffies = msecs_to_jiffies(p1->retire_blk_tov); |
|---|
| 603 | 604 | p1->blk_sizeof_priv = req_u->req3.tp_sizeof_priv; |
|---|
| 605 | + rwlock_init(&p1->blk_fill_in_prog_lock); |
|---|
| 604 | 606 | |
|---|
| 605 | 607 | p1->max_frame_len = p1->kblk_size - BLK_PLUS_PRIV(p1->blk_sizeof_priv); |
|---|
| 606 | 608 | prb_init_ft_ops(p1, req_u); |
|---|
| .. | .. |
|---|
| 667 | 669 | * |
|---|
| 668 | 670 | */ |
|---|
| 669 | 671 | if (BLOCK_NUM_PKTS(pbd)) { |
|---|
| 670 | | - while (atomic_read(&pkc->blk_fill_in_prog)) { |
|---|
| 671 | | - /* Waiting for skb_copy_bits to finish... */ |
|---|
| 672 | | - cpu_chill(); |
|---|
| 673 | | - } |
|---|
| 672 | + /* Waiting for skb_copy_bits to finish... */ |
|---|
| 673 | + write_lock(&pkc->blk_fill_in_prog_lock); |
|---|
| 674 | + write_unlock(&pkc->blk_fill_in_prog_lock); |
|---|
| 674 | 675 | } |
|---|
| 675 | 676 | |
|---|
| 676 | 677 | if (pkc->last_kactive_blk_num == pkc->kactive_blk_num) { |
|---|
| .. | .. |
|---|
| 768 | 769 | struct tpacket_hdr_v1 *h1 = &pbd1->hdr.bh1; |
|---|
| 769 | 770 | struct sock *sk = &po->sk; |
|---|
| 770 | 771 | |
|---|
| 771 | | - if (po->stats.stats3.tp_drops) |
|---|
| 772 | + if (atomic_read(&po->tp_drops)) |
|---|
| 772 | 773 | status |= TP_STATUS_LOSING; |
|---|
| 773 | 774 | |
|---|
| 774 | 775 | last_pkt = (struct tpacket3_hdr *)pkc1->prev; |
|---|
| .. | .. |
|---|
| 784 | 785 | * It shouldn't really happen as we don't close empty |
|---|
| 785 | 786 | * blocks. See prb_retire_rx_blk_timer_expired(). |
|---|
| 786 | 787 | */ |
|---|
| 787 | | - struct timespec ts; |
|---|
| 788 | | - getnstimeofday(&ts); |
|---|
| 788 | + struct timespec64 ts; |
|---|
| 789 | + ktime_get_real_ts64(&ts); |
|---|
| 789 | 790 | h1->ts_last_pkt.ts_sec = ts.tv_sec; |
|---|
| 790 | 791 | h1->ts_last_pkt.ts_nsec = ts.tv_nsec; |
|---|
| 791 | 792 | } |
|---|
| .. | .. |
|---|
| 815 | 816 | static void prb_open_block(struct tpacket_kbdq_core *pkc1, |
|---|
| 816 | 817 | struct tpacket_block_desc *pbd1) |
|---|
| 817 | 818 | { |
|---|
| 818 | | - struct timespec ts; |
|---|
| 819 | + struct timespec64 ts; |
|---|
| 819 | 820 | struct tpacket_hdr_v1 *h1 = &pbd1->hdr.bh1; |
|---|
| 820 | 821 | |
|---|
| 821 | 822 | smp_rmb(); |
|---|
| .. | .. |
|---|
| 828 | 829 | BLOCK_NUM_PKTS(pbd1) = 0; |
|---|
| 829 | 830 | BLOCK_LEN(pbd1) = BLK_PLUS_PRIV(pkc1->blk_sizeof_priv); |
|---|
| 830 | 831 | |
|---|
| 831 | | - getnstimeofday(&ts); |
|---|
| 832 | + ktime_get_real_ts64(&ts); |
|---|
| 832 | 833 | |
|---|
| 833 | 834 | h1->ts_first_pkt.ts_sec = ts.tv_sec; |
|---|
| 834 | 835 | h1->ts_first_pkt.ts_nsec = ts.tv_nsec; |
|---|
| .. | .. |
|---|
| 929 | 930 | * the timer-handler already handled this case. |
|---|
| 930 | 931 | */ |
|---|
| 931 | 932 | if (!(status & TP_STATUS_BLK_TMO)) { |
|---|
| 932 | | - while (atomic_read(&pkc->blk_fill_in_prog)) { |
|---|
| 933 | | - /* Waiting for skb_copy_bits to finish... */ |
|---|
| 934 | | - cpu_chill(); |
|---|
| 935 | | - } |
|---|
| 933 | + /* Waiting for skb_copy_bits to finish... */ |
|---|
| 934 | + write_lock(&pkc->blk_fill_in_prog_lock); |
|---|
| 935 | + write_unlock(&pkc->blk_fill_in_prog_lock); |
|---|
| 936 | 936 | } |
|---|
| 937 | 937 | prb_close_block(pkc, pbd, po, status); |
|---|
| 938 | 938 | return; |
|---|
| .. | .. |
|---|
| 953 | 953 | __releases(&pkc->blk_fill_in_prog_lock) |
|---|
| 954 | 954 | { |
|---|
| 955 | 955 | struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(rb); |
|---|
| 956 | | - atomic_dec(&pkc->blk_fill_in_prog); |
|---|
| 956 | + |
|---|
| 957 | + read_unlock(&pkc->blk_fill_in_prog_lock); |
|---|
| 957 | 958 | } |
|---|
| 958 | 959 | |
|---|
| 959 | 960 | static void prb_fill_rxhash(struct tpacket_kbdq_core *pkc, |
|---|
| .. | .. |
|---|
| 1008 | 1009 | pkc->nxt_offset += TOTAL_PKT_LEN_INCL_ALIGN(len); |
|---|
| 1009 | 1010 | BLOCK_LEN(pbd) += TOTAL_PKT_LEN_INCL_ALIGN(len); |
|---|
| 1010 | 1011 | BLOCK_NUM_PKTS(pbd) += 1; |
|---|
| 1011 | | - atomic_inc(&pkc->blk_fill_in_prog); |
|---|
| 1012 | + read_lock(&pkc->blk_fill_in_prog_lock); |
|---|
| 1012 | 1013 | prb_run_all_ft_ops(pkc, ppd); |
|---|
| 1013 | 1014 | } |
|---|
| 1014 | 1015 | |
|---|
| 1015 | 1016 | /* Assumes caller has the sk->rx_queue.lock */ |
|---|
| 1016 | 1017 | static void *__packet_lookup_frame_in_block(struct packet_sock *po, |
|---|
| 1017 | 1018 | struct sk_buff *skb, |
|---|
| 1018 | | - int status, |
|---|
| 1019 | 1019 | unsigned int len |
|---|
| 1020 | 1020 | ) |
|---|
| 1021 | 1021 | { |
|---|
| .. | .. |
|---|
| 1087 | 1087 | po->rx_ring.head, status); |
|---|
| 1088 | 1088 | return curr; |
|---|
| 1089 | 1089 | case TPACKET_V3: |
|---|
| 1090 | | - return __packet_lookup_frame_in_block(po, skb, status, len); |
|---|
| 1090 | + return __packet_lookup_frame_in_block(po, skb, len); |
|---|
| 1091 | 1091 | default: |
|---|
| 1092 | 1092 | WARN(1, "TPACKET version not supported\n"); |
|---|
| 1093 | 1093 | BUG(); |
|---|
| .. | .. |
|---|
| 1095 | 1095 | } |
|---|
| 1096 | 1096 | } |
|---|
| 1097 | 1097 | |
|---|
| 1098 | | -static void *prb_lookup_block(struct packet_sock *po, |
|---|
| 1099 | | - struct packet_ring_buffer *rb, |
|---|
| 1100 | | - unsigned int idx, |
|---|
| 1101 | | - int status) |
|---|
| 1098 | +static void *prb_lookup_block(const struct packet_sock *po, |
|---|
| 1099 | + const struct packet_ring_buffer *rb, |
|---|
| 1100 | + unsigned int idx, |
|---|
| 1101 | + int status) |
|---|
| 1102 | 1102 | { |
|---|
| 1103 | 1103 | struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(rb); |
|---|
| 1104 | 1104 | struct tpacket_block_desc *pbd = GET_PBLOCK_DESC(pkc, idx); |
|---|
| .. | .. |
|---|
| 1211 | 1211 | #define ROOM_LOW 0x1 |
|---|
| 1212 | 1212 | #define ROOM_NORMAL 0x2 |
|---|
| 1213 | 1213 | |
|---|
| 1214 | | -static bool __tpacket_has_room(struct packet_sock *po, int pow_off) |
|---|
| 1214 | +static bool __tpacket_has_room(const struct packet_sock *po, int pow_off) |
|---|
| 1215 | 1215 | { |
|---|
| 1216 | 1216 | int idx, len; |
|---|
| 1217 | 1217 | |
|---|
| 1218 | | - len = po->rx_ring.frame_max + 1; |
|---|
| 1219 | | - idx = po->rx_ring.head; |
|---|
| 1218 | + len = READ_ONCE(po->rx_ring.frame_max) + 1; |
|---|
| 1219 | + idx = READ_ONCE(po->rx_ring.head); |
|---|
| 1220 | 1220 | if (pow_off) |
|---|
| 1221 | 1221 | idx += len >> pow_off; |
|---|
| 1222 | 1222 | if (idx >= len) |
|---|
| .. | .. |
|---|
| 1224 | 1224 | return packet_lookup_frame(po, &po->rx_ring, idx, TP_STATUS_KERNEL); |
|---|
| 1225 | 1225 | } |
|---|
| 1226 | 1226 | |
|---|
| 1227 | | -static bool __tpacket_v3_has_room(struct packet_sock *po, int pow_off) |
|---|
| 1227 | +static bool __tpacket_v3_has_room(const struct packet_sock *po, int pow_off) |
|---|
| 1228 | 1228 | { |
|---|
| 1229 | 1229 | int idx, len; |
|---|
| 1230 | 1230 | |
|---|
| 1231 | | - len = po->rx_ring.prb_bdqc.knum_blocks; |
|---|
| 1232 | | - idx = po->rx_ring.prb_bdqc.kactive_blk_num; |
|---|
| 1231 | + len = READ_ONCE(po->rx_ring.prb_bdqc.knum_blocks); |
|---|
| 1232 | + idx = READ_ONCE(po->rx_ring.prb_bdqc.kactive_blk_num); |
|---|
| 1233 | 1233 | if (pow_off) |
|---|
| 1234 | 1234 | idx += len >> pow_off; |
|---|
| 1235 | 1235 | if (idx >= len) |
|---|
| .. | .. |
|---|
| 1237 | 1237 | return prb_lookup_block(po, &po->rx_ring, idx, TP_STATUS_KERNEL); |
|---|
| 1238 | 1238 | } |
|---|
| 1239 | 1239 | |
|---|
| 1240 | | -static int __packet_rcv_has_room(struct packet_sock *po, struct sk_buff *skb) |
|---|
| 1240 | +static int __packet_rcv_has_room(const struct packet_sock *po, |
|---|
| 1241 | + const struct sk_buff *skb) |
|---|
| 1241 | 1242 | { |
|---|
| 1242 | | - struct sock *sk = &po->sk; |
|---|
| 1243 | + const struct sock *sk = &po->sk; |
|---|
| 1243 | 1244 | int ret = ROOM_NONE; |
|---|
| 1244 | 1245 | |
|---|
| 1245 | 1246 | if (po->prot_hook.func != tpacket_rcv) { |
|---|
| 1246 | | - int avail = sk->sk_rcvbuf - atomic_read(&sk->sk_rmem_alloc) |
|---|
| 1247 | | - - (skb ? skb->truesize : 0); |
|---|
| 1248 | | - if (avail > (sk->sk_rcvbuf >> ROOM_POW_OFF)) |
|---|
| 1247 | + int rcvbuf = READ_ONCE(sk->sk_rcvbuf); |
|---|
| 1248 | + int avail = rcvbuf - atomic_read(&sk->sk_rmem_alloc) |
|---|
| 1249 | + - (skb ? skb->truesize : 0); |
|---|
| 1250 | + |
|---|
| 1251 | + if (avail > (rcvbuf >> ROOM_POW_OFF)) |
|---|
| 1249 | 1252 | return ROOM_NORMAL; |
|---|
| 1250 | 1253 | else if (avail > 0) |
|---|
| 1251 | 1254 | return ROOM_LOW; |
|---|
| .. | .. |
|---|
| 1270 | 1273 | |
|---|
| 1271 | 1274 | static int packet_rcv_has_room(struct packet_sock *po, struct sk_buff *skb) |
|---|
| 1272 | 1275 | { |
|---|
| 1273 | | - int ret; |
|---|
| 1274 | | - bool has_room; |
|---|
| 1276 | + int pressure, ret; |
|---|
| 1275 | 1277 | |
|---|
| 1276 | | - spin_lock_bh(&po->sk.sk_receive_queue.lock); |
|---|
| 1277 | 1278 | ret = __packet_rcv_has_room(po, skb); |
|---|
| 1278 | | - has_room = ret == ROOM_NORMAL; |
|---|
| 1279 | | - if (po->pressure == has_room) |
|---|
| 1280 | | - po->pressure = !has_room; |
|---|
| 1281 | | - spin_unlock_bh(&po->sk.sk_receive_queue.lock); |
|---|
| 1279 | + pressure = ret != ROOM_NORMAL; |
|---|
| 1280 | + |
|---|
| 1281 | + if (READ_ONCE(po->pressure) != pressure) |
|---|
| 1282 | + WRITE_ONCE(po->pressure, pressure); |
|---|
| 1282 | 1283 | |
|---|
| 1283 | 1284 | return ret; |
|---|
| 1285 | +} |
|---|
| 1286 | + |
|---|
| 1287 | +static void packet_rcv_try_clear_pressure(struct packet_sock *po) |
|---|
| 1288 | +{ |
|---|
| 1289 | + if (READ_ONCE(po->pressure) && |
|---|
| 1290 | + __packet_rcv_has_room(po, NULL) == ROOM_NORMAL) |
|---|
| 1291 | + WRITE_ONCE(po->pressure, 0); |
|---|
| 1284 | 1292 | } |
|---|
| 1285 | 1293 | |
|---|
| 1286 | 1294 | static void packet_sock_destruct(struct sock *sk) |
|---|
| .. | .. |
|---|
| 1356 | 1364 | struct packet_sock *po, *po_next, *po_skip = NULL; |
|---|
| 1357 | 1365 | unsigned int i, j, room = ROOM_NONE; |
|---|
| 1358 | 1366 | |
|---|
| 1359 | | - po = pkt_sk(f->arr[idx]); |
|---|
| 1367 | + po = pkt_sk(rcu_dereference(f->arr[idx])); |
|---|
| 1360 | 1368 | |
|---|
| 1361 | 1369 | if (try_self) { |
|---|
| 1362 | 1370 | room = packet_rcv_has_room(po, skb); |
|---|
| .. | .. |
|---|
| 1368 | 1376 | |
|---|
| 1369 | 1377 | i = j = min_t(int, po->rollover->sock, num - 1); |
|---|
| 1370 | 1378 | do { |
|---|
| 1371 | | - po_next = pkt_sk(f->arr[i]); |
|---|
| 1372 | | - if (po_next != po_skip && !po_next->pressure && |
|---|
| 1379 | + po_next = pkt_sk(rcu_dereference(f->arr[i])); |
|---|
| 1380 | + if (po_next != po_skip && !READ_ONCE(po_next->pressure) && |
|---|
| 1373 | 1381 | packet_rcv_has_room(po_next, skb) == ROOM_NORMAL) { |
|---|
| 1374 | 1382 | if (i != j) |
|---|
| 1375 | 1383 | po->rollover->sock = i; |
|---|
| .. | .. |
|---|
| 1463 | 1471 | if (fanout_has_flag(f, PACKET_FANOUT_FLAG_ROLLOVER)) |
|---|
| 1464 | 1472 | idx = fanout_demux_rollover(f, skb, idx, true, num); |
|---|
| 1465 | 1473 | |
|---|
| 1466 | | - po = pkt_sk(f->arr[idx]); |
|---|
| 1474 | + po = pkt_sk(rcu_dereference(f->arr[idx])); |
|---|
| 1467 | 1475 | return po->prot_hook.func(skb, dev, &po->prot_hook, orig_dev); |
|---|
| 1468 | 1476 | } |
|---|
| 1469 | 1477 | |
|---|
| .. | .. |
|---|
| 1477 | 1485 | struct packet_fanout *f = po->fanout; |
|---|
| 1478 | 1486 | |
|---|
| 1479 | 1487 | spin_lock(&f->lock); |
|---|
| 1480 | | - f->arr[f->num_members] = sk; |
|---|
| 1488 | + rcu_assign_pointer(f->arr[f->num_members], sk); |
|---|
| 1481 | 1489 | smp_wmb(); |
|---|
| 1482 | 1490 | f->num_members++; |
|---|
| 1483 | 1491 | if (f->num_members == 1) |
|---|
| .. | .. |
|---|
| 1492 | 1500 | |
|---|
| 1493 | 1501 | spin_lock(&f->lock); |
|---|
| 1494 | 1502 | for (i = 0; i < f->num_members; i++) { |
|---|
| 1495 | | - if (f->arr[i] == sk) |
|---|
| 1503 | + if (rcu_dereference_protected(f->arr[i], |
|---|
| 1504 | + lockdep_is_held(&f->lock)) == sk) |
|---|
| 1496 | 1505 | break; |
|---|
| 1497 | 1506 | } |
|---|
| 1498 | 1507 | BUG_ON(i >= f->num_members); |
|---|
| 1499 | | - f->arr[i] = f->arr[f->num_members - 1]; |
|---|
| 1508 | + rcu_assign_pointer(f->arr[i], |
|---|
| 1509 | + rcu_dereference_protected(f->arr[f->num_members - 1], |
|---|
| 1510 | + lockdep_is_held(&f->lock))); |
|---|
| 1500 | 1511 | f->num_members--; |
|---|
| 1501 | 1512 | if (f->num_members == 0) |
|---|
| 1502 | 1513 | __dev_remove_pack(&f->prot_hook); |
|---|
| .. | .. |
|---|
| 1539 | 1550 | } |
|---|
| 1540 | 1551 | } |
|---|
| 1541 | 1552 | |
|---|
| 1542 | | -static int fanout_set_data_cbpf(struct packet_sock *po, char __user *data, |
|---|
| 1553 | +static int fanout_set_data_cbpf(struct packet_sock *po, sockptr_t data, |
|---|
| 1543 | 1554 | unsigned int len) |
|---|
| 1544 | 1555 | { |
|---|
| 1545 | 1556 | struct bpf_prog *new; |
|---|
| .. | .. |
|---|
| 1548 | 1559 | |
|---|
| 1549 | 1560 | if (sock_flag(&po->sk, SOCK_FILTER_LOCKED)) |
|---|
| 1550 | 1561 | return -EPERM; |
|---|
| 1551 | | - if (len != sizeof(fprog)) |
|---|
| 1552 | | - return -EINVAL; |
|---|
| 1553 | | - if (copy_from_user(&fprog, data, len)) |
|---|
| 1554 | | - return -EFAULT; |
|---|
| 1562 | + |
|---|
| 1563 | + ret = copy_bpf_fprog_from_user(&fprog, data, len); |
|---|
| 1564 | + if (ret) |
|---|
| 1565 | + return ret; |
|---|
| 1555 | 1566 | |
|---|
| 1556 | 1567 | ret = bpf_prog_create_from_user(&new, &fprog, NULL, false); |
|---|
| 1557 | 1568 | if (ret) |
|---|
| .. | .. |
|---|
| 1561 | 1572 | return 0; |
|---|
| 1562 | 1573 | } |
|---|
| 1563 | 1574 | |
|---|
| 1564 | | -static int fanout_set_data_ebpf(struct packet_sock *po, char __user *data, |
|---|
| 1575 | +static int fanout_set_data_ebpf(struct packet_sock *po, sockptr_t data, |
|---|
| 1565 | 1576 | unsigned int len) |
|---|
| 1566 | 1577 | { |
|---|
| 1567 | 1578 | struct bpf_prog *new; |
|---|
| .. | .. |
|---|
| 1571 | 1582 | return -EPERM; |
|---|
| 1572 | 1583 | if (len != sizeof(fd)) |
|---|
| 1573 | 1584 | return -EINVAL; |
|---|
| 1574 | | - if (copy_from_user(&fd, data, len)) |
|---|
| 1585 | + if (copy_from_sockptr(&fd, data, len)) |
|---|
| 1575 | 1586 | return -EFAULT; |
|---|
| 1576 | 1587 | |
|---|
| 1577 | 1588 | new = bpf_prog_get_type(fd, BPF_PROG_TYPE_SOCKET_FILTER); |
|---|
| .. | .. |
|---|
| 1582 | 1593 | return 0; |
|---|
| 1583 | 1594 | } |
|---|
| 1584 | 1595 | |
|---|
| 1585 | | -static int fanout_set_data(struct packet_sock *po, char __user *data, |
|---|
| 1596 | +static int fanout_set_data(struct packet_sock *po, sockptr_t data, |
|---|
| 1586 | 1597 | unsigned int len) |
|---|
| 1587 | 1598 | { |
|---|
| 1588 | 1599 | switch (po->fanout->type) { |
|---|
| .. | .. |
|---|
| 1634 | 1645 | return false; |
|---|
| 1635 | 1646 | } |
|---|
| 1636 | 1647 | |
|---|
| 1637 | | -static int fanout_add(struct sock *sk, u16 id, u16 type_flags) |
|---|
| 1648 | +static int fanout_add(struct sock *sk, struct fanout_args *args) |
|---|
| 1638 | 1649 | { |
|---|
| 1639 | 1650 | struct packet_rollover *rollover = NULL; |
|---|
| 1640 | 1651 | struct packet_sock *po = pkt_sk(sk); |
|---|
| 1652 | + u16 type_flags = args->type_flags; |
|---|
| 1641 | 1653 | struct packet_fanout *f, *match; |
|---|
| 1642 | 1654 | u8 type = type_flags & 0xff; |
|---|
| 1643 | 1655 | u8 flags = type_flags >> 8; |
|---|
| 1656 | + u16 id = args->id; |
|---|
| 1644 | 1657 | int err; |
|---|
| 1645 | 1658 | |
|---|
| 1646 | 1659 | switch (type) { |
|---|
| .. | .. |
|---|
| 1698 | 1711 | } |
|---|
| 1699 | 1712 | } |
|---|
| 1700 | 1713 | err = -EINVAL; |
|---|
| 1701 | | - if (match && match->flags != flags) |
|---|
| 1702 | | - goto out; |
|---|
| 1703 | | - if (!match) { |
|---|
| 1714 | + if (match) { |
|---|
| 1715 | + if (match->flags != flags) |
|---|
| 1716 | + goto out; |
|---|
| 1717 | + if (args->max_num_members && |
|---|
| 1718 | + args->max_num_members != match->max_num_members) |
|---|
| 1719 | + goto out; |
|---|
| 1720 | + } else { |
|---|
| 1721 | + if (args->max_num_members > PACKET_FANOUT_MAX) |
|---|
| 1722 | + goto out; |
|---|
| 1723 | + if (!args->max_num_members) |
|---|
| 1724 | + /* legacy PACKET_FANOUT_MAX */ |
|---|
| 1725 | + args->max_num_members = 256; |
|---|
| 1704 | 1726 | err = -ENOMEM; |
|---|
| 1705 | | - match = kzalloc(sizeof(*match), GFP_KERNEL); |
|---|
| 1727 | + match = kvzalloc(struct_size(match, arr, args->max_num_members), |
|---|
| 1728 | + GFP_KERNEL); |
|---|
| 1706 | 1729 | if (!match) |
|---|
| 1707 | 1730 | goto out; |
|---|
| 1708 | 1731 | write_pnet(&match->net, sock_net(sk)); |
|---|
| .. | .. |
|---|
| 1719 | 1742 | match->prot_hook.af_packet_priv = match; |
|---|
| 1720 | 1743 | match->prot_hook.af_packet_net = read_pnet(&match->net); |
|---|
| 1721 | 1744 | match->prot_hook.id_match = match_fanout_group; |
|---|
| 1745 | + match->max_num_members = args->max_num_members; |
|---|
| 1722 | 1746 | list_add(&match->list, &fanout_list); |
|---|
| 1723 | 1747 | } |
|---|
| 1724 | 1748 | err = -EINVAL; |
|---|
| .. | .. |
|---|
| 1729 | 1753 | match->prot_hook.type == po->prot_hook.type && |
|---|
| 1730 | 1754 | match->prot_hook.dev == po->prot_hook.dev) { |
|---|
| 1731 | 1755 | err = -ENOSPC; |
|---|
| 1732 | | - if (refcount_read(&match->sk_ref) < PACKET_FANOUT_MAX) { |
|---|
| 1756 | + if (refcount_read(&match->sk_ref) < match->max_num_members) { |
|---|
| 1733 | 1757 | __dev_remove_pack(&po->prot_hook); |
|---|
| 1734 | 1758 | |
|---|
| 1735 | 1759 | /* Paired with packet_setsockopt(PACKET_FANOUT_DATA) */ |
|---|
| .. | .. |
|---|
| 1746 | 1770 | |
|---|
| 1747 | 1771 | if (err && !refcount_read(&match->sk_ref)) { |
|---|
| 1748 | 1772 | list_del(&match->list); |
|---|
| 1749 | | - kfree(match); |
|---|
| 1773 | + kvfree(match); |
|---|
| 1750 | 1774 | } |
|---|
| 1751 | 1775 | |
|---|
| 1752 | 1776 | out: |
|---|
| .. | .. |
|---|
| 1836 | 1860 | skb_dst_drop(skb); |
|---|
| 1837 | 1861 | |
|---|
| 1838 | 1862 | /* drop conntrack reference */ |
|---|
| 1839 | | - nf_reset(skb); |
|---|
| 1863 | + nf_reset_ct(skb); |
|---|
| 1840 | 1864 | |
|---|
| 1841 | 1865 | spkt = &PACKET_SKB_CB(skb)->sa.pkt; |
|---|
| 1842 | 1866 | |
|---|
| .. | .. |
|---|
| 1864 | 1888 | return 0; |
|---|
| 1865 | 1889 | } |
|---|
| 1866 | 1890 | |
|---|
| 1891 | +static void packet_parse_headers(struct sk_buff *skb, struct socket *sock) |
|---|
| 1892 | +{ |
|---|
| 1893 | + int depth; |
|---|
| 1894 | + |
|---|
| 1895 | + if ((!skb->protocol || skb->protocol == htons(ETH_P_ALL)) && |
|---|
| 1896 | + sock->type == SOCK_RAW) { |
|---|
| 1897 | + skb_reset_mac_header(skb); |
|---|
| 1898 | + skb->protocol = dev_parse_header_protocol(skb); |
|---|
| 1899 | + } |
|---|
| 1900 | + |
|---|
| 1901 | + /* Move network header to the right position for VLAN tagged packets */ |
|---|
| 1902 | + if (likely(skb->dev->type == ARPHRD_ETHER) && |
|---|
| 1903 | + eth_type_vlan(skb->protocol) && |
|---|
| 1904 | + vlan_get_protocol_and_depth(skb, skb->protocol, &depth) != 0) |
|---|
| 1905 | + skb_set_network_header(skb, depth); |
|---|
| 1906 | + |
|---|
| 1907 | + skb_probe_transport_header(skb); |
|---|
| 1908 | +} |
|---|
| 1867 | 1909 | |
|---|
| 1868 | 1910 | /* |
|---|
| 1869 | 1911 | * Output a raw packet to a device layer. This bypasses all the other |
|---|
| .. | .. |
|---|
| 1956 | 1998 | goto retry; |
|---|
| 1957 | 1999 | } |
|---|
| 1958 | 2000 | |
|---|
| 1959 | | - if (!dev_validate_header(dev, skb->data, len)) { |
|---|
| 2001 | + if (!dev_validate_header(dev, skb->data, len) || !skb->len) { |
|---|
| 1960 | 2002 | err = -EINVAL; |
|---|
| 1961 | 2003 | goto out_unlock; |
|---|
| 1962 | 2004 | } |
|---|
| .. | .. |
|---|
| 1979 | 2021 | skb->mark = sk->sk_mark; |
|---|
| 1980 | 2022 | skb->tstamp = sockc.transmit_time; |
|---|
| 1981 | 2023 | |
|---|
| 1982 | | - sock_tx_timestamp(sk, sockc.tsflags, &skb_shinfo(skb)->tx_flags); |
|---|
| 2024 | + skb_setup_tx_timestamp(skb, sockc.tsflags); |
|---|
| 1983 | 2025 | |
|---|
| 1984 | 2026 | if (unlikely(extra_len == 4)) |
|---|
| 1985 | 2027 | skb->no_fcs = 1; |
|---|
| 1986 | 2028 | |
|---|
| 1987 | | - skb_probe_transport_header(skb, 0); |
|---|
| 2029 | + packet_parse_headers(skb, sock); |
|---|
| 1988 | 2030 | |
|---|
| 1989 | 2031 | dev_queue_xmit(skb); |
|---|
| 1990 | 2032 | rcu_read_unlock(); |
|---|
| .. | .. |
|---|
| 2061 | 2103 | |
|---|
| 2062 | 2104 | skb->dev = dev; |
|---|
| 2063 | 2105 | |
|---|
| 2064 | | - if (dev->header_ops) { |
|---|
| 2106 | + if (dev_has_header(dev)) { |
|---|
| 2065 | 2107 | /* The device has an explicit notion of ll header, |
|---|
| 2066 | 2108 | * exported to higher levels. |
|---|
| 2067 | 2109 | * |
|---|
| .. | .. |
|---|
| 2106 | 2148 | sll = &PACKET_SKB_CB(skb)->sa.ll; |
|---|
| 2107 | 2149 | sll->sll_hatype = dev->type; |
|---|
| 2108 | 2150 | sll->sll_pkttype = skb->pkt_type; |
|---|
| 2109 | | - if (unlikely(po->origdev)) |
|---|
| 2151 | + if (unlikely(packet_sock_flag(po, PACKET_SOCK_ORIGDEV))) |
|---|
| 2110 | 2152 | sll->sll_ifindex = orig_dev->ifindex; |
|---|
| 2111 | 2153 | else |
|---|
| 2112 | 2154 | sll->sll_ifindex = dev->ifindex; |
|---|
| .. | .. |
|---|
| 2126 | 2168 | skb_dst_drop(skb); |
|---|
| 2127 | 2169 | |
|---|
| 2128 | 2170 | /* drop conntrack reference */ |
|---|
| 2129 | | - nf_reset(skb); |
|---|
| 2171 | + nf_reset_ct(skb); |
|---|
| 2130 | 2172 | |
|---|
| 2131 | 2173 | spin_lock(&sk->sk_receive_queue.lock); |
|---|
| 2132 | 2174 | po->stats.stats1.tp_packets++; |
|---|
| .. | .. |
|---|
| 2138 | 2180 | |
|---|
| 2139 | 2181 | drop_n_acct: |
|---|
| 2140 | 2182 | is_drop_n_account = true; |
|---|
| 2141 | | - spin_lock(&sk->sk_receive_queue.lock); |
|---|
| 2142 | | - po->stats.stats1.tp_drops++; |
|---|
| 2183 | + atomic_inc(&po->tp_drops); |
|---|
| 2143 | 2184 | atomic_inc(&sk->sk_drops); |
|---|
| 2144 | | - spin_unlock(&sk->sk_receive_queue.lock); |
|---|
| 2145 | 2185 | |
|---|
| 2146 | 2186 | drop_n_restore: |
|---|
| 2147 | 2187 | if (skb_head != skb->data && skb_shared(skb)) { |
|---|
| .. | .. |
|---|
| 2170 | 2210 | unsigned short macoff, hdrlen; |
|---|
| 2171 | 2211 | unsigned int netoff; |
|---|
| 2172 | 2212 | struct sk_buff *copy_skb = NULL; |
|---|
| 2173 | | - struct timespec ts; |
|---|
| 2213 | + struct timespec64 ts; |
|---|
| 2174 | 2214 | __u32 ts_status; |
|---|
| 2175 | 2215 | bool is_drop_n_account = false; |
|---|
| 2176 | 2216 | unsigned int slot_id = 0; |
|---|
| .. | .. |
|---|
| 2192 | 2232 | if (!net_eq(dev_net(dev), sock_net(sk))) |
|---|
| 2193 | 2233 | goto drop; |
|---|
| 2194 | 2234 | |
|---|
| 2195 | | - if (dev->header_ops) { |
|---|
| 2235 | + if (dev_has_header(dev)) { |
|---|
| 2196 | 2236 | if (sk->sk_type != SOCK_DGRAM) |
|---|
| 2197 | 2237 | skb_push(skb, skb->data - skb_mac_header(skb)); |
|---|
| 2198 | 2238 | else if (skb->pkt_type == PACKET_OUTGOING) { |
|---|
| .. | .. |
|---|
| 2207 | 2247 | if (!res) |
|---|
| 2208 | 2248 | goto drop_n_restore; |
|---|
| 2209 | 2249 | |
|---|
| 2250 | + /* If we are flooded, just give up */ |
|---|
| 2251 | + if (__packet_rcv_has_room(po, skb) == ROOM_NONE) { |
|---|
| 2252 | + atomic_inc(&po->tp_drops); |
|---|
| 2253 | + goto drop_n_restore; |
|---|
| 2254 | + } |
|---|
| 2255 | + |
|---|
| 2210 | 2256 | if (skb->ip_summed == CHECKSUM_PARTIAL) |
|---|
| 2211 | 2257 | status |= TP_STATUS_CSUMNOTREADY; |
|---|
| 2212 | 2258 | else if (skb->pkt_type != PACKET_OUTGOING && |
|---|
| 2213 | | - (skb->ip_summed == CHECKSUM_COMPLETE || |
|---|
| 2214 | | - skb_csum_unnecessary(skb))) |
|---|
| 2259 | + skb_csum_unnecessary(skb)) |
|---|
| 2215 | 2260 | status |= TP_STATUS_CSUM_VALID; |
|---|
| 2216 | 2261 | |
|---|
| 2217 | 2262 | if (snaplen > res) |
|---|
| .. | .. |
|---|
| 2232 | 2277 | macoff = netoff - maclen; |
|---|
| 2233 | 2278 | } |
|---|
| 2234 | 2279 | if (netoff > USHRT_MAX) { |
|---|
| 2235 | | - spin_lock(&sk->sk_receive_queue.lock); |
|---|
| 2236 | | - po->stats.stats1.tp_drops++; |
|---|
| 2237 | | - spin_unlock(&sk->sk_receive_queue.lock); |
|---|
| 2280 | + atomic_inc(&po->tp_drops); |
|---|
| 2238 | 2281 | goto drop_n_restore; |
|---|
| 2239 | 2282 | } |
|---|
| 2240 | 2283 | if (po->tp_version <= TPACKET_V2) { |
|---|
| .. | .. |
|---|
| 2247 | 2290 | copy_skb = skb_get(skb); |
|---|
| 2248 | 2291 | skb_head = skb->data; |
|---|
| 2249 | 2292 | } |
|---|
| 2250 | | - if (copy_skb) |
|---|
| 2293 | + if (copy_skb) { |
|---|
| 2294 | + memset(&PACKET_SKB_CB(copy_skb)->sa.ll, 0, |
|---|
| 2295 | + sizeof(PACKET_SKB_CB(copy_skb)->sa.ll)); |
|---|
| 2251 | 2296 | skb_set_owner_r(copy_skb, sk); |
|---|
| 2297 | + } |
|---|
| 2252 | 2298 | } |
|---|
| 2253 | 2299 | snaplen = po->rx_ring.frame_size - macoff; |
|---|
| 2254 | 2300 | if ((int)snaplen < 0) { |
|---|
| .. | .. |
|---|
| 2300 | 2346 | * Anyways, moving it for V1/V2 only as V3 doesn't need this |
|---|
| 2301 | 2347 | * at packet level. |
|---|
| 2302 | 2348 | */ |
|---|
| 2303 | | - if (po->stats.stats1.tp_drops) |
|---|
| 2349 | + if (atomic_read(&po->tp_drops)) |
|---|
| 2304 | 2350 | status |= TP_STATUS_LOSING; |
|---|
| 2305 | 2351 | } |
|---|
| 2306 | 2352 | |
|---|
| .. | .. |
|---|
| 2313 | 2359 | |
|---|
| 2314 | 2360 | skb_copy_bits(skb, 0, h.raw + macoff, snaplen); |
|---|
| 2315 | 2361 | |
|---|
| 2316 | | - if (!(ts_status = tpacket_get_timestamp(skb, &ts, po->tp_tstamp))) |
|---|
| 2317 | | - getnstimeofday(&ts); |
|---|
| 2362 | + /* Always timestamp; prefer an existing software timestamp taken |
|---|
| 2363 | + * closer to the time of capture. |
|---|
| 2364 | + */ |
|---|
| 2365 | + ts_status = tpacket_get_timestamp(skb, &ts, |
|---|
| 2366 | + po->tp_tstamp | SOF_TIMESTAMPING_SOFTWARE); |
|---|
| 2367 | + if (!ts_status) |
|---|
| 2368 | + ktime_get_real_ts64(&ts); |
|---|
| 2318 | 2369 | |
|---|
| 2319 | 2370 | status |= ts_status; |
|---|
| 2320 | 2371 | |
|---|
| .. | .. |
|---|
| 2370 | 2421 | sll->sll_hatype = dev->type; |
|---|
| 2371 | 2422 | sll->sll_protocol = skb->protocol; |
|---|
| 2372 | 2423 | sll->sll_pkttype = skb->pkt_type; |
|---|
| 2373 | | - if (unlikely(po->origdev)) |
|---|
| 2424 | + if (unlikely(packet_sock_flag(po, PACKET_SOCK_ORIGDEV))) |
|---|
| 2374 | 2425 | sll->sll_ifindex = orig_dev->ifindex; |
|---|
| 2375 | 2426 | else |
|---|
| 2376 | 2427 | sll->sll_ifindex = dev->ifindex; |
|---|
| .. | .. |
|---|
| 2413 | 2464 | return 0; |
|---|
| 2414 | 2465 | |
|---|
| 2415 | 2466 | drop_n_account: |
|---|
| 2416 | | - is_drop_n_account = true; |
|---|
| 2417 | | - po->stats.stats1.tp_drops++; |
|---|
| 2418 | 2467 | spin_unlock(&sk->sk_receive_queue.lock); |
|---|
| 2468 | + atomic_inc(&po->tp_drops); |
|---|
| 2469 | + is_drop_n_account = true; |
|---|
| 2419 | 2470 | |
|---|
| 2420 | 2471 | sk->sk_data_ready(sk); |
|---|
| 2421 | 2472 | kfree_skb(copy_skb); |
|---|
| .. | .. |
|---|
| 2441 | 2492 | } |
|---|
| 2442 | 2493 | |
|---|
| 2443 | 2494 | sock_wfree(skb); |
|---|
| 2444 | | -} |
|---|
| 2445 | | - |
|---|
| 2446 | | -static void tpacket_set_protocol(const struct net_device *dev, |
|---|
| 2447 | | - struct sk_buff *skb) |
|---|
| 2448 | | -{ |
|---|
| 2449 | | - if (dev->type == ARPHRD_ETHER) { |
|---|
| 2450 | | - skb_reset_mac_header(skb); |
|---|
| 2451 | | - skb->protocol = eth_hdr(skb)->h_proto; |
|---|
| 2452 | | - } |
|---|
| 2453 | 2495 | } |
|---|
| 2454 | 2496 | |
|---|
| 2455 | 2497 | static int __packet_snd_vnet_parse(struct virtio_net_hdr *vnet_hdr, size_t len) |
|---|
| .. | .. |
|---|
| 2499 | 2541 | skb->priority = po->sk.sk_priority; |
|---|
| 2500 | 2542 | skb->mark = po->sk.sk_mark; |
|---|
| 2501 | 2543 | skb->tstamp = sockc->transmit_time; |
|---|
| 2502 | | - sock_tx_timestamp(&po->sk, sockc->tsflags, &skb_shinfo(skb)->tx_flags); |
|---|
| 2544 | + skb_setup_tx_timestamp(skb, sockc->tsflags); |
|---|
| 2503 | 2545 | skb_zcopy_set_nouarg(skb, ph.raw); |
|---|
| 2504 | 2546 | |
|---|
| 2505 | 2547 | skb_reserve(skb, hlen); |
|---|
| .. | .. |
|---|
| 2522 | 2564 | return err; |
|---|
| 2523 | 2565 | if (!dev_validate_header(dev, skb->data, hdrlen)) |
|---|
| 2524 | 2566 | return -EINVAL; |
|---|
| 2525 | | - if (!skb->protocol) |
|---|
| 2526 | | - tpacket_set_protocol(dev, skb); |
|---|
| 2527 | 2567 | |
|---|
| 2528 | 2568 | data += hdrlen; |
|---|
| 2529 | 2569 | to_write -= hdrlen; |
|---|
| .. | .. |
|---|
| 2558 | 2598 | len = ((to_write > len_max) ? len_max : to_write); |
|---|
| 2559 | 2599 | } |
|---|
| 2560 | 2600 | |
|---|
| 2561 | | - skb_probe_transport_header(skb, 0); |
|---|
| 2601 | + packet_parse_headers(skb, sock); |
|---|
| 2562 | 2602 | |
|---|
| 2563 | 2603 | return tp_len; |
|---|
| 2564 | 2604 | } |
|---|
| .. | .. |
|---|
| 2788 | 2828 | packet_inc_pending(&po->tx_ring); |
|---|
| 2789 | 2829 | |
|---|
| 2790 | 2830 | status = TP_STATUS_SEND_REQUEST; |
|---|
| 2791 | | - err = po->xmit(skb); |
|---|
| 2792 | | - if (unlikely(err > 0)) { |
|---|
| 2793 | | - err = net_xmit_errno(err); |
|---|
| 2831 | + /* Paired with WRITE_ONCE() in packet_setsockopt() */ |
|---|
| 2832 | + err = READ_ONCE(po->xmit)(skb); |
|---|
| 2833 | + if (unlikely(err != 0)) { |
|---|
| 2834 | + if (err > 0) |
|---|
| 2835 | + err = net_xmit_errno(err); |
|---|
| 2794 | 2836 | if (err && __packet_get_status(po, ph) == |
|---|
| 2795 | 2837 | TP_STATUS_AVAILABLE) { |
|---|
| 2796 | 2838 | /* skb was destructed already */ |
|---|
| .. | .. |
|---|
| 2957 | 2999 | if (err) |
|---|
| 2958 | 3000 | goto out_free; |
|---|
| 2959 | 3001 | |
|---|
| 2960 | | - if (sock->type == SOCK_RAW && |
|---|
| 2961 | | - !dev_validate_header(dev, skb->data, len)) { |
|---|
| 3002 | + if ((sock->type == SOCK_RAW && |
|---|
| 3003 | + !dev_validate_header(dev, skb->data, len)) || !skb->len) { |
|---|
| 2962 | 3004 | err = -EINVAL; |
|---|
| 2963 | 3005 | goto out_free; |
|---|
| 2964 | 3006 | } |
|---|
| 2965 | 3007 | |
|---|
| 2966 | | - sock_tx_timestamp(sk, sockc.tsflags, &skb_shinfo(skb)->tx_flags); |
|---|
| 3008 | + skb_setup_tx_timestamp(skb, sockc.tsflags); |
|---|
| 2967 | 3009 | |
|---|
| 2968 | 3010 | if (!vnet_hdr.gso_type && (len > dev->mtu + reserve + extra_len) && |
|---|
| 2969 | 3011 | !packet_extra_vlan_len_allowed(dev, skb)) { |
|---|
| .. | .. |
|---|
| 2977 | 3019 | skb->mark = sockc.mark; |
|---|
| 2978 | 3020 | skb->tstamp = sockc.transmit_time; |
|---|
| 2979 | 3021 | |
|---|
| 3022 | + if (unlikely(extra_len == 4)) |
|---|
| 3023 | + skb->no_fcs = 1; |
|---|
| 3024 | + |
|---|
| 3025 | + packet_parse_headers(skb, sock); |
|---|
| 3026 | + |
|---|
| 2980 | 3027 | if (has_vnet_hdr) { |
|---|
| 2981 | 3028 | err = virtio_net_hdr_to_skb(skb, &vnet_hdr, vio_le()); |
|---|
| 2982 | 3029 | if (err) |
|---|
| .. | .. |
|---|
| 2985 | 3032 | virtio_net_hdr_set_proto(skb, &vnet_hdr); |
|---|
| 2986 | 3033 | } |
|---|
| 2987 | 3034 | |
|---|
| 2988 | | - skb_probe_transport_header(skb, reserve); |
|---|
| 2989 | | - |
|---|
| 2990 | | - if (unlikely(extra_len == 4)) |
|---|
| 2991 | | - skb->no_fcs = 1; |
|---|
| 2992 | | - |
|---|
| 2993 | | - err = po->xmit(skb); |
|---|
| 2994 | | - if (err > 0 && (err = net_xmit_errno(err)) != 0) |
|---|
| 2995 | | - goto out_unlock; |
|---|
| 3035 | + /* Paired with WRITE_ONCE() in packet_setsockopt() */ |
|---|
| 3036 | + err = READ_ONCE(po->xmit)(skb); |
|---|
| 3037 | + if (unlikely(err != 0)) { |
|---|
| 3038 | + if (err > 0) |
|---|
| 3039 | + err = net_xmit_errno(err); |
|---|
| 3040 | + if (err) |
|---|
| 3041 | + goto out_unlock; |
|---|
| 3042 | + } |
|---|
| 2996 | 3043 | |
|---|
| 2997 | 3044 | dev_put(dev); |
|---|
| 2998 | 3045 | |
|---|
| .. | .. |
|---|
| 3012 | 3059 | struct sock *sk = sock->sk; |
|---|
| 3013 | 3060 | struct packet_sock *po = pkt_sk(sk); |
|---|
| 3014 | 3061 | |
|---|
| 3015 | | - if (po->tx_ring.pg_vec) |
|---|
| 3062 | + /* Reading tx_ring.pg_vec without holding pg_vec_lock is racy. |
|---|
| 3063 | + * tpacket_snd() will redo the check safely. |
|---|
| 3064 | + */ |
|---|
| 3065 | + if (data_race(po->tx_ring.pg_vec)) |
|---|
| 3016 | 3066 | return tpacket_snd(po, msg); |
|---|
| 3017 | | - else |
|---|
| 3018 | | - return packet_snd(sock, msg, len); |
|---|
| 3067 | + |
|---|
| 3068 | + return packet_snd(sock, msg, len); |
|---|
| 3019 | 3069 | } |
|---|
| 3020 | 3070 | |
|---|
| 3021 | 3071 | /* |
|---|
| .. | .. |
|---|
| 3076 | 3126 | kfree(po->rollover); |
|---|
| 3077 | 3127 | if (f) { |
|---|
| 3078 | 3128 | fanout_release_data(f); |
|---|
| 3079 | | - kfree(f); |
|---|
| 3129 | + kvfree(f); |
|---|
| 3080 | 3130 | } |
|---|
| 3081 | 3131 | /* |
|---|
| 3082 | 3132 | * Now the socket is dead. No more input will appear. |
|---|
| .. | .. |
|---|
| 3111 | 3161 | |
|---|
| 3112 | 3162 | lock_sock(sk); |
|---|
| 3113 | 3163 | spin_lock(&po->bind_lock); |
|---|
| 3164 | + if (!proto) |
|---|
| 3165 | + proto = po->num; |
|---|
| 3166 | + |
|---|
| 3114 | 3167 | rcu_read_lock(); |
|---|
| 3115 | 3168 | |
|---|
| 3116 | 3169 | if (po->fanout) { |
|---|
| .. | .. |
|---|
| 3213 | 3266 | memcpy(name, uaddr->sa_data, sizeof(uaddr->sa_data)); |
|---|
| 3214 | 3267 | name[sizeof(uaddr->sa_data)] = 0; |
|---|
| 3215 | 3268 | |
|---|
| 3216 | | - return packet_do_bind(sk, name, 0, pkt_sk(sk)->num); |
|---|
| 3269 | + return packet_do_bind(sk, name, 0, 0); |
|---|
| 3217 | 3270 | } |
|---|
| 3218 | 3271 | |
|---|
| 3219 | 3272 | static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) |
|---|
| .. | .. |
|---|
| 3230 | 3283 | if (sll->sll_family != AF_PACKET) |
|---|
| 3231 | 3284 | return -EINVAL; |
|---|
| 3232 | 3285 | |
|---|
| 3233 | | - return packet_do_bind(sk, NULL, sll->sll_ifindex, |
|---|
| 3234 | | - sll->sll_protocol ? : pkt_sk(sk)->num); |
|---|
| 3286 | + return packet_do_bind(sk, NULL, sll->sll_ifindex, sll->sll_protocol); |
|---|
| 3235 | 3287 | } |
|---|
| 3236 | 3288 | |
|---|
| 3237 | 3289 | static struct proto packet_proto = { |
|---|
| .. | .. |
|---|
| 3371 | 3423 | if (skb == NULL) |
|---|
| 3372 | 3424 | goto out; |
|---|
| 3373 | 3425 | |
|---|
| 3374 | | - if (pkt_sk(sk)->pressure) |
|---|
| 3375 | | - packet_rcv_has_room(pkt_sk(sk), NULL); |
|---|
| 3426 | + packet_rcv_try_clear_pressure(pkt_sk(sk)); |
|---|
| 3376 | 3427 | |
|---|
| 3377 | 3428 | if (pkt_sk(sk)->has_vnet_hdr) { |
|---|
| 3378 | 3429 | err = packet_rcv_vnet(msg, skb, &len); |
|---|
| .. | .. |
|---|
| 3407 | 3458 | sock_recv_ts_and_drops(msg, sk, skb); |
|---|
| 3408 | 3459 | |
|---|
| 3409 | 3460 | if (msg->msg_name) { |
|---|
| 3461 | + const size_t max_len = min(sizeof(skb->cb), |
|---|
| 3462 | + sizeof(struct sockaddr_storage)); |
|---|
| 3410 | 3463 | int copy_len; |
|---|
| 3411 | 3464 | |
|---|
| 3412 | 3465 | /* If the address length field is there to be filled |
|---|
| .. | .. |
|---|
| 3429 | 3482 | msg->msg_namelen = sizeof(struct sockaddr_ll); |
|---|
| 3430 | 3483 | } |
|---|
| 3431 | 3484 | } |
|---|
| 3485 | + if (WARN_ON_ONCE(copy_len > max_len)) { |
|---|
| 3486 | + copy_len = max_len; |
|---|
| 3487 | + msg->msg_namelen = copy_len; |
|---|
| 3488 | + } |
|---|
| 3432 | 3489 | memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa, copy_len); |
|---|
| 3433 | 3490 | } |
|---|
| 3434 | 3491 | |
|---|
| 3435 | | - if (pkt_sk(sk)->auxdata) { |
|---|
| 3492 | + if (packet_sock_flag(pkt_sk(sk), PACKET_SOCK_AUXDATA)) { |
|---|
| 3436 | 3493 | struct tpacket_auxdata aux; |
|---|
| 3437 | 3494 | |
|---|
| 3438 | 3495 | aux.tp_status = TP_STATUS_USER; |
|---|
| 3439 | 3496 | if (skb->ip_summed == CHECKSUM_PARTIAL) |
|---|
| 3440 | 3497 | aux.tp_status |= TP_STATUS_CSUMNOTREADY; |
|---|
| 3441 | 3498 | else if (skb->pkt_type != PACKET_OUTGOING && |
|---|
| 3442 | | - (skb->ip_summed == CHECKSUM_COMPLETE || |
|---|
| 3443 | | - skb_csum_unnecessary(skb))) |
|---|
| 3499 | + skb_csum_unnecessary(skb)) |
|---|
| 3444 | 3500 | aux.tp_status |= TP_STATUS_CSUM_VALID; |
|---|
| 3445 | 3501 | |
|---|
| 3446 | 3502 | aux.tp_len = origlen; |
|---|
| .. | .. |
|---|
| 3670 | 3726 | } |
|---|
| 3671 | 3727 | |
|---|
| 3672 | 3728 | static int |
|---|
| 3673 | | -packet_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen) |
|---|
| 3729 | +packet_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval, |
|---|
| 3730 | + unsigned int optlen) |
|---|
| 3674 | 3731 | { |
|---|
| 3675 | 3732 | struct sock *sk = sock->sk; |
|---|
| 3676 | 3733 | struct packet_sock *po = pkt_sk(sk); |
|---|
| .. | .. |
|---|
| 3690 | 3747 | return -EINVAL; |
|---|
| 3691 | 3748 | if (len > sizeof(mreq)) |
|---|
| 3692 | 3749 | len = sizeof(mreq); |
|---|
| 3693 | | - if (copy_from_user(&mreq, optval, len)) |
|---|
| 3750 | + if (copy_from_sockptr(&mreq, optval, len)) |
|---|
| 3694 | 3751 | return -EFAULT; |
|---|
| 3695 | 3752 | if (len < (mreq.mr_alen + offsetof(struct packet_mreq, mr_address))) |
|---|
| 3696 | 3753 | return -EINVAL; |
|---|
| .. | .. |
|---|
| 3721 | 3778 | if (optlen < len) { |
|---|
| 3722 | 3779 | ret = -EINVAL; |
|---|
| 3723 | 3780 | } else { |
|---|
| 3724 | | - if (copy_from_user(&req_u.req, optval, len)) |
|---|
| 3781 | + if (copy_from_sockptr(&req_u.req, optval, len)) |
|---|
| 3725 | 3782 | ret = -EFAULT; |
|---|
| 3726 | 3783 | else |
|---|
| 3727 | 3784 | ret = packet_set_ring(sk, &req_u, 0, |
|---|
| .. | .. |
|---|
| 3736 | 3793 | |
|---|
| 3737 | 3794 | if (optlen != sizeof(val)) |
|---|
| 3738 | 3795 | return -EINVAL; |
|---|
| 3739 | | - if (copy_from_user(&val, optval, sizeof(val))) |
|---|
| 3796 | + if (copy_from_sockptr(&val, optval, sizeof(val))) |
|---|
| 3740 | 3797 | return -EFAULT; |
|---|
| 3741 | 3798 | |
|---|
| 3742 | 3799 | pkt_sk(sk)->copy_thresh = val; |
|---|
| .. | .. |
|---|
| 3748 | 3805 | |
|---|
| 3749 | 3806 | if (optlen != sizeof(val)) |
|---|
| 3750 | 3807 | return -EINVAL; |
|---|
| 3751 | | - if (copy_from_user(&val, optval, sizeof(val))) |
|---|
| 3808 | + if (copy_from_sockptr(&val, optval, sizeof(val))) |
|---|
| 3752 | 3809 | return -EFAULT; |
|---|
| 3753 | 3810 | switch (val) { |
|---|
| 3754 | 3811 | case TPACKET_V1: |
|---|
| .. | .. |
|---|
| 3774 | 3831 | |
|---|
| 3775 | 3832 | if (optlen != sizeof(val)) |
|---|
| 3776 | 3833 | return -EINVAL; |
|---|
| 3777 | | - if (copy_from_user(&val, optval, sizeof(val))) |
|---|
| 3834 | + if (copy_from_sockptr(&val, optval, sizeof(val))) |
|---|
| 3778 | 3835 | return -EFAULT; |
|---|
| 3779 | 3836 | if (val > INT_MAX) |
|---|
| 3780 | 3837 | return -EINVAL; |
|---|
| .. | .. |
|---|
| 3794 | 3851 | |
|---|
| 3795 | 3852 | if (optlen != sizeof(val)) |
|---|
| 3796 | 3853 | return -EINVAL; |
|---|
| 3797 | | - if (copy_from_user(&val, optval, sizeof(val))) |
|---|
| 3854 | + if (copy_from_sockptr(&val, optval, sizeof(val))) |
|---|
| 3798 | 3855 | return -EFAULT; |
|---|
| 3799 | 3856 | |
|---|
| 3800 | 3857 | lock_sock(sk); |
|---|
| .. | .. |
|---|
| 3813 | 3870 | |
|---|
| 3814 | 3871 | if (optlen < sizeof(val)) |
|---|
| 3815 | 3872 | return -EINVAL; |
|---|
| 3816 | | - if (copy_from_user(&val, optval, sizeof(val))) |
|---|
| 3873 | + if (copy_from_sockptr(&val, optval, sizeof(val))) |
|---|
| 3817 | 3874 | return -EFAULT; |
|---|
| 3818 | 3875 | |
|---|
| 3819 | | - lock_sock(sk); |
|---|
| 3820 | | - po->auxdata = !!val; |
|---|
| 3821 | | - release_sock(sk); |
|---|
| 3876 | + packet_sock_flag_set(po, PACKET_SOCK_AUXDATA, val); |
|---|
| 3822 | 3877 | return 0; |
|---|
| 3823 | 3878 | } |
|---|
| 3824 | 3879 | case PACKET_ORIGDEV: |
|---|
| .. | .. |
|---|
| 3827 | 3882 | |
|---|
| 3828 | 3883 | if (optlen < sizeof(val)) |
|---|
| 3829 | 3884 | return -EINVAL; |
|---|
| 3830 | | - if (copy_from_user(&val, optval, sizeof(val))) |
|---|
| 3885 | + if (copy_from_sockptr(&val, optval, sizeof(val))) |
|---|
| 3831 | 3886 | return -EFAULT; |
|---|
| 3832 | 3887 | |
|---|
| 3833 | | - lock_sock(sk); |
|---|
| 3834 | | - po->origdev = !!val; |
|---|
| 3835 | | - release_sock(sk); |
|---|
| 3888 | + packet_sock_flag_set(po, PACKET_SOCK_ORIGDEV, val); |
|---|
| 3836 | 3889 | return 0; |
|---|
| 3837 | 3890 | } |
|---|
| 3838 | 3891 | case PACKET_VNET_HDR: |
|---|
| .. | .. |
|---|
| 3843 | 3896 | return -EINVAL; |
|---|
| 3844 | 3897 | if (optlen < sizeof(val)) |
|---|
| 3845 | 3898 | return -EINVAL; |
|---|
| 3846 | | - if (copy_from_user(&val, optval, sizeof(val))) |
|---|
| 3899 | + if (copy_from_sockptr(&val, optval, sizeof(val))) |
|---|
| 3847 | 3900 | return -EFAULT; |
|---|
| 3848 | 3901 | |
|---|
| 3849 | 3902 | lock_sock(sk); |
|---|
| .. | .. |
|---|
| 3862 | 3915 | |
|---|
| 3863 | 3916 | if (optlen != sizeof(val)) |
|---|
| 3864 | 3917 | return -EINVAL; |
|---|
| 3865 | | - if (copy_from_user(&val, optval, sizeof(val))) |
|---|
| 3918 | + if (copy_from_sockptr(&val, optval, sizeof(val))) |
|---|
| 3866 | 3919 | return -EFAULT; |
|---|
| 3867 | 3920 | |
|---|
| 3868 | 3921 | po->tp_tstamp = val; |
|---|
| .. | .. |
|---|
| 3870 | 3923 | } |
|---|
| 3871 | 3924 | case PACKET_FANOUT: |
|---|
| 3872 | 3925 | { |
|---|
| 3873 | | - int val; |
|---|
| 3926 | + struct fanout_args args = { 0 }; |
|---|
| 3874 | 3927 | |
|---|
| 3875 | | - if (optlen != sizeof(val)) |
|---|
| 3928 | + if (optlen != sizeof(int) && optlen != sizeof(args)) |
|---|
| 3876 | 3929 | return -EINVAL; |
|---|
| 3877 | | - if (copy_from_user(&val, optval, sizeof(val))) |
|---|
| 3930 | + if (copy_from_sockptr(&args, optval, optlen)) |
|---|
| 3878 | 3931 | return -EFAULT; |
|---|
| 3879 | 3932 | |
|---|
| 3880 | | - return fanout_add(sk, val & 0xffff, val >> 16); |
|---|
| 3933 | + return fanout_add(sk, &args); |
|---|
| 3881 | 3934 | } |
|---|
| 3882 | 3935 | case PACKET_FANOUT_DATA: |
|---|
| 3883 | 3936 | { |
|---|
| .. | .. |
|---|
| 3887 | 3940 | |
|---|
| 3888 | 3941 | return fanout_set_data(po, optval, optlen); |
|---|
| 3889 | 3942 | } |
|---|
| 3943 | + case PACKET_IGNORE_OUTGOING: |
|---|
| 3944 | + { |
|---|
| 3945 | + int val; |
|---|
| 3946 | + |
|---|
| 3947 | + if (optlen != sizeof(val)) |
|---|
| 3948 | + return -EINVAL; |
|---|
| 3949 | + if (copy_from_sockptr(&val, optval, sizeof(val))) |
|---|
| 3950 | + return -EFAULT; |
|---|
| 3951 | + if (val < 0 || val > 1) |
|---|
| 3952 | + return -EINVAL; |
|---|
| 3953 | + |
|---|
| 3954 | + po->prot_hook.ignore_outgoing = !!val; |
|---|
| 3955 | + return 0; |
|---|
| 3956 | + } |
|---|
| 3890 | 3957 | case PACKET_TX_HAS_OFF: |
|---|
| 3891 | 3958 | { |
|---|
| 3892 | 3959 | unsigned int val; |
|---|
| 3893 | 3960 | |
|---|
| 3894 | 3961 | if (optlen != sizeof(val)) |
|---|
| 3895 | 3962 | return -EINVAL; |
|---|
| 3896 | | - if (copy_from_user(&val, optval, sizeof(val))) |
|---|
| 3963 | + if (copy_from_sockptr(&val, optval, sizeof(val))) |
|---|
| 3897 | 3964 | return -EFAULT; |
|---|
| 3898 | 3965 | |
|---|
| 3899 | 3966 | lock_sock(sk); |
|---|
| .. | .. |
|---|
| 3912 | 3979 | |
|---|
| 3913 | 3980 | if (optlen != sizeof(val)) |
|---|
| 3914 | 3981 | return -EINVAL; |
|---|
| 3915 | | - if (copy_from_user(&val, optval, sizeof(val))) |
|---|
| 3982 | + if (copy_from_sockptr(&val, optval, sizeof(val))) |
|---|
| 3916 | 3983 | return -EFAULT; |
|---|
| 3917 | 3984 | |
|---|
| 3918 | | - po->xmit = val ? packet_direct_xmit : dev_queue_xmit; |
|---|
| 3985 | + /* Paired with all lockless reads of po->xmit */ |
|---|
| 3986 | + WRITE_ONCE(po->xmit, val ? packet_direct_xmit : dev_queue_xmit); |
|---|
| 3919 | 3987 | return 0; |
|---|
| 3920 | 3988 | } |
|---|
| 3921 | 3989 | default: |
|---|
| .. | .. |
|---|
| 3933 | 4001 | void *data = &val; |
|---|
| 3934 | 4002 | union tpacket_stats_u st; |
|---|
| 3935 | 4003 | struct tpacket_rollover_stats rstats; |
|---|
| 4004 | + int drops; |
|---|
| 3936 | 4005 | |
|---|
| 3937 | 4006 | if (level != SOL_PACKET) |
|---|
| 3938 | 4007 | return -ENOPROTOOPT; |
|---|
| .. | .. |
|---|
| 3949 | 4018 | memcpy(&st, &po->stats, sizeof(st)); |
|---|
| 3950 | 4019 | memset(&po->stats, 0, sizeof(po->stats)); |
|---|
| 3951 | 4020 | spin_unlock_bh(&sk->sk_receive_queue.lock); |
|---|
| 4021 | + drops = atomic_xchg(&po->tp_drops, 0); |
|---|
| 3952 | 4022 | |
|---|
| 3953 | 4023 | if (po->tp_version == TPACKET_V3) { |
|---|
| 3954 | 4024 | lv = sizeof(struct tpacket_stats_v3); |
|---|
| 3955 | | - st.stats3.tp_packets += st.stats3.tp_drops; |
|---|
| 4025 | + st.stats3.tp_drops = drops; |
|---|
| 4026 | + st.stats3.tp_packets += drops; |
|---|
| 3956 | 4027 | data = &st.stats3; |
|---|
| 3957 | 4028 | } else { |
|---|
| 3958 | 4029 | lv = sizeof(struct tpacket_stats); |
|---|
| 3959 | | - st.stats1.tp_packets += st.stats1.tp_drops; |
|---|
| 4030 | + st.stats1.tp_drops = drops; |
|---|
| 4031 | + st.stats1.tp_packets += drops; |
|---|
| 3960 | 4032 | data = &st.stats1; |
|---|
| 3961 | 4033 | } |
|---|
| 3962 | 4034 | |
|---|
| 3963 | 4035 | break; |
|---|
| 3964 | 4036 | case PACKET_AUXDATA: |
|---|
| 3965 | | - val = po->auxdata; |
|---|
| 4037 | + val = packet_sock_flag(po, PACKET_SOCK_AUXDATA); |
|---|
| 3966 | 4038 | break; |
|---|
| 3967 | 4039 | case PACKET_ORIGDEV: |
|---|
| 3968 | | - val = po->origdev; |
|---|
| 4040 | + val = packet_sock_flag(po, PACKET_SOCK_ORIGDEV); |
|---|
| 3969 | 4041 | break; |
|---|
| 3970 | 4042 | case PACKET_VNET_HDR: |
|---|
| 3971 | 4043 | val = po->has_vnet_hdr; |
|---|
| .. | .. |
|---|
| 4010 | 4082 | ((u32)po->fanout->flags << 24)) : |
|---|
| 4011 | 4083 | 0); |
|---|
| 4012 | 4084 | break; |
|---|
| 4085 | + case PACKET_IGNORE_OUTGOING: |
|---|
| 4086 | + val = po->prot_hook.ignore_outgoing; |
|---|
| 4087 | + break; |
|---|
| 4013 | 4088 | case PACKET_ROLLOVER_STATS: |
|---|
| 4014 | 4089 | if (!po->rollover) |
|---|
| 4015 | 4090 | return -EINVAL; |
|---|
| .. | .. |
|---|
| 4038 | 4113 | return 0; |
|---|
| 4039 | 4114 | } |
|---|
| 4040 | 4115 | |
|---|
| 4041 | | - |
|---|
| 4042 | | -#ifdef CONFIG_COMPAT |
|---|
| 4043 | | -static int compat_packet_setsockopt(struct socket *sock, int level, int optname, |
|---|
| 4044 | | - char __user *optval, unsigned int optlen) |
|---|
| 4045 | | -{ |
|---|
| 4046 | | - struct packet_sock *po = pkt_sk(sock->sk); |
|---|
| 4047 | | - |
|---|
| 4048 | | - if (level != SOL_PACKET) |
|---|
| 4049 | | - return -ENOPROTOOPT; |
|---|
| 4050 | | - |
|---|
| 4051 | | - if (optname == PACKET_FANOUT_DATA && |
|---|
| 4052 | | - po->fanout && po->fanout->type == PACKET_FANOUT_CBPF) { |
|---|
| 4053 | | - optval = (char __user *)get_compat_bpf_fprog(optval); |
|---|
| 4054 | | - if (!optval) |
|---|
| 4055 | | - return -EFAULT; |
|---|
| 4056 | | - optlen = sizeof(struct sock_fprog); |
|---|
| 4057 | | - } |
|---|
| 4058 | | - |
|---|
| 4059 | | - return packet_setsockopt(sock, level, optname, optval, optlen); |
|---|
| 4060 | | -} |
|---|
| 4061 | | -#endif |
|---|
| 4062 | | - |
|---|
| 4063 | 4116 | static int packet_notifier(struct notifier_block *this, |
|---|
| 4064 | 4117 | unsigned long msg, void *ptr) |
|---|
| 4065 | 4118 | { |
|---|
| .. | .. |
|---|
| 4075 | 4128 | case NETDEV_UNREGISTER: |
|---|
| 4076 | 4129 | if (po->mclist) |
|---|
| 4077 | 4130 | packet_dev_mclist_delete(dev, &po->mclist); |
|---|
| 4078 | | - /* fallthrough */ |
|---|
| 4131 | + fallthrough; |
|---|
| 4079 | 4132 | |
|---|
| 4080 | 4133 | case NETDEV_DOWN: |
|---|
| 4081 | 4134 | if (dev->ifindex == po->ifindex) { |
|---|
| .. | .. |
|---|
| 4135 | 4188 | spin_unlock_bh(&sk->sk_receive_queue.lock); |
|---|
| 4136 | 4189 | return put_user(amount, (int __user *)arg); |
|---|
| 4137 | 4190 | } |
|---|
| 4138 | | - case SIOCGSTAMP: |
|---|
| 4139 | | - return sock_get_timestamp(sk, (struct timeval __user *)arg); |
|---|
| 4140 | | - case SIOCGSTAMPNS: |
|---|
| 4141 | | - return sock_get_timestampns(sk, (struct timespec __user *)arg); |
|---|
| 4142 | | - |
|---|
| 4143 | 4191 | #ifdef CONFIG_INET |
|---|
| 4144 | 4192 | case SIOCADDRT: |
|---|
| 4145 | 4193 | case SIOCDELRT: |
|---|
| .. | .. |
|---|
| 4177 | 4225 | TP_STATUS_KERNEL)) |
|---|
| 4178 | 4226 | mask |= EPOLLIN | EPOLLRDNORM; |
|---|
| 4179 | 4227 | } |
|---|
| 4180 | | - if (po->pressure && __packet_rcv_has_room(po, NULL) == ROOM_NORMAL) |
|---|
| 4181 | | - po->pressure = 0; |
|---|
| 4228 | + packet_rcv_try_clear_pressure(po); |
|---|
| 4182 | 4229 | spin_unlock_bh(&sk->sk_receive_queue.lock); |
|---|
| 4183 | 4230 | spin_lock_bh(&sk->sk_write_queue.lock); |
|---|
| 4184 | 4231 | if (po->tx_ring.pg_vec) { |
|---|
| .. | .. |
|---|
| 4297 | 4344 | struct packet_ring_buffer *rb; |
|---|
| 4298 | 4345 | struct sk_buff_head *rb_queue; |
|---|
| 4299 | 4346 | __be16 num; |
|---|
| 4300 | | - int err = -EINVAL; |
|---|
| 4347 | + int err; |
|---|
| 4301 | 4348 | /* Added to avoid minimal code churn */ |
|---|
| 4302 | 4349 | struct tpacket_req *req = &req_u->req; |
|---|
| 4303 | 4350 | |
|---|
| .. | .. |
|---|
| 4527 | 4574 | .getname = packet_getname_spkt, |
|---|
| 4528 | 4575 | .poll = datagram_poll, |
|---|
| 4529 | 4576 | .ioctl = packet_ioctl, |
|---|
| 4577 | + .gettstamp = sock_gettstamp, |
|---|
| 4530 | 4578 | .listen = sock_no_listen, |
|---|
| 4531 | 4579 | .shutdown = sock_no_shutdown, |
|---|
| 4532 | | - .setsockopt = sock_no_setsockopt, |
|---|
| 4533 | | - .getsockopt = sock_no_getsockopt, |
|---|
| 4534 | 4580 | .sendmsg = packet_sendmsg_spkt, |
|---|
| 4535 | 4581 | .recvmsg = packet_recvmsg, |
|---|
| 4536 | 4582 | .mmap = sock_no_mmap, |
|---|
| .. | .. |
|---|
| 4548 | 4594 | .getname = packet_getname, |
|---|
| 4549 | 4595 | .poll = packet_poll, |
|---|
| 4550 | 4596 | .ioctl = packet_ioctl, |
|---|
| 4597 | + .gettstamp = sock_gettstamp, |
|---|
| 4551 | 4598 | .listen = sock_no_listen, |
|---|
| 4552 | 4599 | .shutdown = sock_no_shutdown, |
|---|
| 4553 | 4600 | .setsockopt = packet_setsockopt, |
|---|
| 4554 | 4601 | .getsockopt = packet_getsockopt, |
|---|
| 4555 | | -#ifdef CONFIG_COMPAT |
|---|
| 4556 | | - .compat_setsockopt = compat_packet_setsockopt, |
|---|
| 4557 | | -#endif |
|---|
| 4558 | 4602 | .sendmsg = packet_sendmsg, |
|---|
| 4559 | 4603 | .recvmsg = packet_recvmsg, |
|---|
| 4560 | 4604 | .mmap = packet_mmap, |
|---|
| .. | .. |
|---|
| 4631 | 4675 | mutex_init(&net->packet.sklist_lock); |
|---|
| 4632 | 4676 | INIT_HLIST_HEAD(&net->packet.sklist); |
|---|
| 4633 | 4677 | |
|---|
| 4678 | +#ifdef CONFIG_PROC_FS |
|---|
| 4634 | 4679 | if (!proc_create_net("packet", 0, net->proc_net, &packet_seq_ops, |
|---|
| 4635 | 4680 | sizeof(struct seq_net_private))) |
|---|
| 4636 | 4681 | return -ENOMEM; |
|---|
| 4682 | +#endif /* CONFIG_PROC_FS */ |
|---|
| 4637 | 4683 | |
|---|
| 4638 | 4684 | return 0; |
|---|
| 4639 | 4685 | } |
|---|