| .. | .. |
|---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-or-later |
|---|
| 1 | 2 | /* |
|---|
| 2 | 3 | * INET An implementation of the TCP/IP protocol suite for the LINUX |
|---|
| 3 | 4 | * operating system. INET is implemented using the BSD Socket |
|---|
| .. | .. |
|---|
| 5 | 6 | * |
|---|
| 6 | 7 | * Generic socket support routines. Memory allocators, socket lock/release |
|---|
| 7 | 8 | * handler for protocols to use and generic option handler. |
|---|
| 8 | | - * |
|---|
| 9 | 9 | * |
|---|
| 10 | 10 | * Authors: Ross Biro |
|---|
| 11 | 11 | * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> |
|---|
| .. | .. |
|---|
| 81 | 81 | * Arnaldo C. Melo : cleanups, use skb_queue_purge |
|---|
| 82 | 82 | * |
|---|
| 83 | 83 | * To Fix: |
|---|
| 84 | | - * |
|---|
| 85 | | - * |
|---|
| 86 | | - * This program is free software; you can redistribute it and/or |
|---|
| 87 | | - * modify it under the terms of the GNU General Public License |
|---|
| 88 | | - * as published by the Free Software Foundation; either version |
|---|
| 89 | | - * 2 of the License, or (at your option) any later version. |
|---|
| 90 | 84 | */ |
|---|
| 91 | 85 | |
|---|
| 92 | 86 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
|---|
| .. | .. |
|---|
| 119 | 113 | #include <linux/static_key.h> |
|---|
| 120 | 114 | #include <linux/memcontrol.h> |
|---|
| 121 | 115 | #include <linux/prefetch.h> |
|---|
| 116 | +#include <linux/compat.h> |
|---|
| 122 | 117 | |
|---|
| 123 | 118 | #include <linux/uaccess.h> |
|---|
| 124 | 119 | |
|---|
| .. | .. |
|---|
| 137 | 132 | |
|---|
| 138 | 133 | #include <linux/filter.h> |
|---|
| 139 | 134 | #include <net/sock_reuseport.h> |
|---|
| 135 | +#include <net/bpf_sk_storage.h> |
|---|
| 140 | 136 | |
|---|
| 141 | 137 | #include <trace/events/sock.h> |
|---|
| 138 | +#include <trace/hooks/sched.h> |
|---|
| 142 | 139 | |
|---|
| 143 | 140 | #include <net/tcp.h> |
|---|
| 144 | 141 | #include <net/busy_poll.h> |
|---|
| .. | .. |
|---|
| 335 | 332 | } |
|---|
| 336 | 333 | EXPORT_SYMBOL(__sk_backlog_rcv); |
|---|
| 337 | 334 | |
|---|
| 338 | | -static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen) |
|---|
| 335 | +static int sock_get_timeout(long timeo, void *optval, bool old_timeval) |
|---|
| 339 | 336 | { |
|---|
| 340 | | - struct timeval tv; |
|---|
| 337 | + struct __kernel_sock_timeval tv; |
|---|
| 341 | 338 | |
|---|
| 342 | | - if (optlen < sizeof(tv)) |
|---|
| 343 | | - return -EINVAL; |
|---|
| 344 | | - if (copy_from_user(&tv, optval, sizeof(tv))) |
|---|
| 345 | | - return -EFAULT; |
|---|
| 339 | + if (timeo == MAX_SCHEDULE_TIMEOUT) { |
|---|
| 340 | + tv.tv_sec = 0; |
|---|
| 341 | + tv.tv_usec = 0; |
|---|
| 342 | + } else { |
|---|
| 343 | + tv.tv_sec = timeo / HZ; |
|---|
| 344 | + tv.tv_usec = ((timeo % HZ) * USEC_PER_SEC) / HZ; |
|---|
| 345 | + } |
|---|
| 346 | + |
|---|
| 347 | + if (old_timeval && in_compat_syscall() && !COMPAT_USE_64BIT_TIME) { |
|---|
| 348 | + struct old_timeval32 tv32 = { tv.tv_sec, tv.tv_usec }; |
|---|
| 349 | + *(struct old_timeval32 *)optval = tv32; |
|---|
| 350 | + return sizeof(tv32); |
|---|
| 351 | + } |
|---|
| 352 | + |
|---|
| 353 | + if (old_timeval) { |
|---|
| 354 | + struct __kernel_old_timeval old_tv; |
|---|
| 355 | + old_tv.tv_sec = tv.tv_sec; |
|---|
| 356 | + old_tv.tv_usec = tv.tv_usec; |
|---|
| 357 | + *(struct __kernel_old_timeval *)optval = old_tv; |
|---|
| 358 | + return sizeof(old_tv); |
|---|
| 359 | + } |
|---|
| 360 | + |
|---|
| 361 | + *(struct __kernel_sock_timeval *)optval = tv; |
|---|
| 362 | + return sizeof(tv); |
|---|
| 363 | +} |
|---|
| 364 | + |
|---|
| 365 | +static int sock_set_timeout(long *timeo_p, sockptr_t optval, int optlen, |
|---|
| 366 | + bool old_timeval) |
|---|
| 367 | +{ |
|---|
| 368 | + struct __kernel_sock_timeval tv; |
|---|
| 369 | + |
|---|
| 370 | + if (old_timeval && in_compat_syscall() && !COMPAT_USE_64BIT_TIME) { |
|---|
| 371 | + struct old_timeval32 tv32; |
|---|
| 372 | + |
|---|
| 373 | + if (optlen < sizeof(tv32)) |
|---|
| 374 | + return -EINVAL; |
|---|
| 375 | + |
|---|
| 376 | + if (copy_from_sockptr(&tv32, optval, sizeof(tv32))) |
|---|
| 377 | + return -EFAULT; |
|---|
| 378 | + tv.tv_sec = tv32.tv_sec; |
|---|
| 379 | + tv.tv_usec = tv32.tv_usec; |
|---|
| 380 | + } else if (old_timeval) { |
|---|
| 381 | + struct __kernel_old_timeval old_tv; |
|---|
| 382 | + |
|---|
| 383 | + if (optlen < sizeof(old_tv)) |
|---|
| 384 | + return -EINVAL; |
|---|
| 385 | + if (copy_from_sockptr(&old_tv, optval, sizeof(old_tv))) |
|---|
| 386 | + return -EFAULT; |
|---|
| 387 | + tv.tv_sec = old_tv.tv_sec; |
|---|
| 388 | + tv.tv_usec = old_tv.tv_usec; |
|---|
| 389 | + } else { |
|---|
| 390 | + if (optlen < sizeof(tv)) |
|---|
| 391 | + return -EINVAL; |
|---|
| 392 | + if (copy_from_sockptr(&tv, optval, sizeof(tv))) |
|---|
| 393 | + return -EFAULT; |
|---|
| 394 | + } |
|---|
| 346 | 395 | if (tv.tv_usec < 0 || tv.tv_usec >= USEC_PER_SEC) |
|---|
| 347 | 396 | return -EDOM; |
|---|
| 348 | 397 | |
|---|
| .. | .. |
|---|
| 360 | 409 | *timeo_p = MAX_SCHEDULE_TIMEOUT; |
|---|
| 361 | 410 | if (tv.tv_sec == 0 && tv.tv_usec == 0) |
|---|
| 362 | 411 | return 0; |
|---|
| 363 | | - if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT/HZ - 1)) |
|---|
| 364 | | - *timeo_p = tv.tv_sec * HZ + DIV_ROUND_UP(tv.tv_usec, USEC_PER_SEC / HZ); |
|---|
| 412 | + if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT / HZ - 1)) |
|---|
| 413 | + *timeo_p = tv.tv_sec * HZ + DIV_ROUND_UP((unsigned long)tv.tv_usec, USEC_PER_SEC / HZ); |
|---|
| 365 | 414 | return 0; |
|---|
| 366 | | -} |
|---|
| 367 | | - |
|---|
| 368 | | -static void sock_warn_obsolete_bsdism(const char *name) |
|---|
| 369 | | -{ |
|---|
| 370 | | - static int warned; |
|---|
| 371 | | - static char warncomm[TASK_COMM_LEN]; |
|---|
| 372 | | - if (strcmp(warncomm, current->comm) && warned < 5) { |
|---|
| 373 | | - strcpy(warncomm, current->comm); |
|---|
| 374 | | - pr_warn("process `%s' is using obsolete %s SO_BSDCOMPAT\n", |
|---|
| 375 | | - warncomm, name); |
|---|
| 376 | | - warned++; |
|---|
| 377 | | - } |
|---|
| 378 | 415 | } |
|---|
| 379 | 416 | |
|---|
| 380 | 417 | static bool sock_needs_netstamp(const struct sock *sk) |
|---|
| .. | .. |
|---|
| 472 | 509 | |
|---|
| 473 | 510 | rc = sk_backlog_rcv(sk, skb); |
|---|
| 474 | 511 | |
|---|
| 475 | | - mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_); |
|---|
| 476 | | - } else if (sk_add_backlog(sk, skb, sk->sk_rcvbuf)) { |
|---|
| 512 | + mutex_release(&sk->sk_lock.dep_map, _RET_IP_); |
|---|
| 513 | + } else if (sk_add_backlog(sk, skb, READ_ONCE(sk->sk_rcvbuf))) { |
|---|
| 477 | 514 | bh_unlock_sock(sk); |
|---|
| 478 | 515 | atomic_inc(&sk->sk_drops); |
|---|
| 479 | 516 | goto discard_and_relse; |
|---|
| .. | .. |
|---|
| 520 | 557 | } |
|---|
| 521 | 558 | EXPORT_SYMBOL(sk_dst_check); |
|---|
| 522 | 559 | |
|---|
| 523 | | -static int sock_setbindtodevice(struct sock *sk, char __user *optval, |
|---|
| 524 | | - int optlen) |
|---|
| 560 | +static int sock_bindtoindex_locked(struct sock *sk, int ifindex) |
|---|
| 561 | +{ |
|---|
| 562 | + int ret = -ENOPROTOOPT; |
|---|
| 563 | +#ifdef CONFIG_NETDEVICES |
|---|
| 564 | + struct net *net = sock_net(sk); |
|---|
| 565 | + |
|---|
| 566 | + /* Sorry... */ |
|---|
| 567 | + ret = -EPERM; |
|---|
| 568 | + if (sk->sk_bound_dev_if && !ns_capable(net->user_ns, CAP_NET_RAW)) |
|---|
| 569 | + goto out; |
|---|
| 570 | + |
|---|
| 571 | + ret = -EINVAL; |
|---|
| 572 | + if (ifindex < 0) |
|---|
| 573 | + goto out; |
|---|
| 574 | + |
|---|
| 575 | + sk->sk_bound_dev_if = ifindex; |
|---|
| 576 | + if (sk->sk_prot->rehash) |
|---|
| 577 | + sk->sk_prot->rehash(sk); |
|---|
| 578 | + sk_dst_reset(sk); |
|---|
| 579 | + |
|---|
| 580 | + ret = 0; |
|---|
| 581 | + |
|---|
| 582 | +out: |
|---|
| 583 | +#endif |
|---|
| 584 | + |
|---|
| 585 | + return ret; |
|---|
| 586 | +} |
|---|
| 587 | + |
|---|
| 588 | +int sock_bindtoindex(struct sock *sk, int ifindex, bool lock_sk) |
|---|
| 589 | +{ |
|---|
| 590 | + int ret; |
|---|
| 591 | + |
|---|
| 592 | + if (lock_sk) |
|---|
| 593 | + lock_sock(sk); |
|---|
| 594 | + ret = sock_bindtoindex_locked(sk, ifindex); |
|---|
| 595 | + if (lock_sk) |
|---|
| 596 | + release_sock(sk); |
|---|
| 597 | + |
|---|
| 598 | + return ret; |
|---|
| 599 | +} |
|---|
| 600 | +EXPORT_SYMBOL(sock_bindtoindex); |
|---|
| 601 | + |
|---|
| 602 | +static int sock_setbindtodevice(struct sock *sk, sockptr_t optval, int optlen) |
|---|
| 525 | 603 | { |
|---|
| 526 | 604 | int ret = -ENOPROTOOPT; |
|---|
| 527 | 605 | #ifdef CONFIG_NETDEVICES |
|---|
| 528 | 606 | struct net *net = sock_net(sk); |
|---|
| 529 | 607 | char devname[IFNAMSIZ]; |
|---|
| 530 | 608 | int index; |
|---|
| 531 | | - |
|---|
| 532 | | - /* Sorry... */ |
|---|
| 533 | | - ret = -EPERM; |
|---|
| 534 | | - if (!ns_capable(net->user_ns, CAP_NET_RAW)) |
|---|
| 535 | | - goto out; |
|---|
| 536 | 609 | |
|---|
| 537 | 610 | ret = -EINVAL; |
|---|
| 538 | 611 | if (optlen < 0) |
|---|
| .. | .. |
|---|
| 548 | 621 | memset(devname, 0, sizeof(devname)); |
|---|
| 549 | 622 | |
|---|
| 550 | 623 | ret = -EFAULT; |
|---|
| 551 | | - if (copy_from_user(devname, optval, optlen)) |
|---|
| 624 | + if (copy_from_sockptr(devname, optval, optlen)) |
|---|
| 552 | 625 | goto out; |
|---|
| 553 | 626 | |
|---|
| 554 | 627 | index = 0; |
|---|
| .. | .. |
|---|
| 565 | 638 | goto out; |
|---|
| 566 | 639 | } |
|---|
| 567 | 640 | |
|---|
| 568 | | - lock_sock(sk); |
|---|
| 569 | | - sk->sk_bound_dev_if = index; |
|---|
| 570 | | - sk_dst_reset(sk); |
|---|
| 571 | | - release_sock(sk); |
|---|
| 572 | | - |
|---|
| 573 | | - ret = 0; |
|---|
| 574 | | - |
|---|
| 641 | + return sock_bindtoindex(sk, index, true); |
|---|
| 575 | 642 | out: |
|---|
| 576 | 643 | #endif |
|---|
| 577 | 644 | |
|---|
| .. | .. |
|---|
| 618 | 685 | return ret; |
|---|
| 619 | 686 | } |
|---|
| 620 | 687 | |
|---|
| 621 | | -static inline void sock_valbool_flag(struct sock *sk, int bit, int valbool) |
|---|
| 622 | | -{ |
|---|
| 623 | | - if (valbool) |
|---|
| 624 | | - sock_set_flag(sk, bit); |
|---|
| 625 | | - else |
|---|
| 626 | | - sock_reset_flag(sk, bit); |
|---|
| 627 | | -} |
|---|
| 628 | | - |
|---|
| 629 | 688 | bool sk_mc_loop(struct sock *sk) |
|---|
| 630 | 689 | { |
|---|
| 631 | 690 | if (dev_recursion_level()) |
|---|
| 632 | 691 | return false; |
|---|
| 633 | 692 | if (!sk) |
|---|
| 634 | 693 | return true; |
|---|
| 635 | | - switch (sk->sk_family) { |
|---|
| 694 | + /* IPV6_ADDRFORM can change sk->sk_family under us. */ |
|---|
| 695 | + switch (READ_ONCE(sk->sk_family)) { |
|---|
| 636 | 696 | case AF_INET: |
|---|
| 637 | 697 | return inet_sk(sk)->mc_loop; |
|---|
| 638 | 698 | #if IS_ENABLED(CONFIG_IPV6) |
|---|
| .. | .. |
|---|
| 645 | 705 | } |
|---|
| 646 | 706 | EXPORT_SYMBOL(sk_mc_loop); |
|---|
| 647 | 707 | |
|---|
| 708 | +void sock_set_reuseaddr(struct sock *sk) |
|---|
| 709 | +{ |
|---|
| 710 | + lock_sock(sk); |
|---|
| 711 | + sk->sk_reuse = SK_CAN_REUSE; |
|---|
| 712 | + release_sock(sk); |
|---|
| 713 | +} |
|---|
| 714 | +EXPORT_SYMBOL(sock_set_reuseaddr); |
|---|
| 715 | + |
|---|
| 716 | +void sock_set_reuseport(struct sock *sk) |
|---|
| 717 | +{ |
|---|
| 718 | + lock_sock(sk); |
|---|
| 719 | + sk->sk_reuseport = true; |
|---|
| 720 | + release_sock(sk); |
|---|
| 721 | +} |
|---|
| 722 | +EXPORT_SYMBOL(sock_set_reuseport); |
|---|
| 723 | + |
|---|
| 724 | +void sock_no_linger(struct sock *sk) |
|---|
| 725 | +{ |
|---|
| 726 | + lock_sock(sk); |
|---|
| 727 | + sk->sk_lingertime = 0; |
|---|
| 728 | + sock_set_flag(sk, SOCK_LINGER); |
|---|
| 729 | + release_sock(sk); |
|---|
| 730 | +} |
|---|
| 731 | +EXPORT_SYMBOL(sock_no_linger); |
|---|
| 732 | + |
|---|
| 733 | +void sock_set_priority(struct sock *sk, u32 priority) |
|---|
| 734 | +{ |
|---|
| 735 | + lock_sock(sk); |
|---|
| 736 | + sk->sk_priority = priority; |
|---|
| 737 | + release_sock(sk); |
|---|
| 738 | +} |
|---|
| 739 | +EXPORT_SYMBOL(sock_set_priority); |
|---|
| 740 | + |
|---|
| 741 | +void sock_set_sndtimeo(struct sock *sk, s64 secs) |
|---|
| 742 | +{ |
|---|
| 743 | + lock_sock(sk); |
|---|
| 744 | + if (secs && secs < MAX_SCHEDULE_TIMEOUT / HZ - 1) |
|---|
| 745 | + sk->sk_sndtimeo = secs * HZ; |
|---|
| 746 | + else |
|---|
| 747 | + sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT; |
|---|
| 748 | + release_sock(sk); |
|---|
| 749 | +} |
|---|
| 750 | +EXPORT_SYMBOL(sock_set_sndtimeo); |
|---|
| 751 | + |
|---|
| 752 | +static void __sock_set_timestamps(struct sock *sk, bool val, bool new, bool ns) |
|---|
| 753 | +{ |
|---|
| 754 | + if (val) { |
|---|
| 755 | + sock_valbool_flag(sk, SOCK_TSTAMP_NEW, new); |
|---|
| 756 | + sock_valbool_flag(sk, SOCK_RCVTSTAMPNS, ns); |
|---|
| 757 | + sock_set_flag(sk, SOCK_RCVTSTAMP); |
|---|
| 758 | + sock_enable_timestamp(sk, SOCK_TIMESTAMP); |
|---|
| 759 | + } else { |
|---|
| 760 | + sock_reset_flag(sk, SOCK_RCVTSTAMP); |
|---|
| 761 | + sock_reset_flag(sk, SOCK_RCVTSTAMPNS); |
|---|
| 762 | + } |
|---|
| 763 | +} |
|---|
| 764 | + |
|---|
| 765 | +void sock_enable_timestamps(struct sock *sk) |
|---|
| 766 | +{ |
|---|
| 767 | + lock_sock(sk); |
|---|
| 768 | + __sock_set_timestamps(sk, true, false, true); |
|---|
| 769 | + release_sock(sk); |
|---|
| 770 | +} |
|---|
| 771 | +EXPORT_SYMBOL(sock_enable_timestamps); |
|---|
| 772 | + |
|---|
| 773 | +void sock_set_keepalive(struct sock *sk) |
|---|
| 774 | +{ |
|---|
| 775 | + lock_sock(sk); |
|---|
| 776 | + if (sk->sk_prot->keepalive) |
|---|
| 777 | + sk->sk_prot->keepalive(sk, true); |
|---|
| 778 | + sock_valbool_flag(sk, SOCK_KEEPOPEN, true); |
|---|
| 779 | + release_sock(sk); |
|---|
| 780 | +} |
|---|
| 781 | +EXPORT_SYMBOL(sock_set_keepalive); |
|---|
| 782 | + |
|---|
| 783 | +static void __sock_set_rcvbuf(struct sock *sk, int val) |
|---|
| 784 | +{ |
|---|
| 785 | + /* Ensure val * 2 fits into an int, to prevent max_t() from treating it |
|---|
| 786 | + * as a negative value. |
|---|
| 787 | + */ |
|---|
| 788 | + val = min_t(int, val, INT_MAX / 2); |
|---|
| 789 | + sk->sk_userlocks |= SOCK_RCVBUF_LOCK; |
|---|
| 790 | + |
|---|
| 791 | + /* We double it on the way in to account for "struct sk_buff" etc. |
|---|
| 792 | + * overhead. Applications assume that the SO_RCVBUF setting they make |
|---|
| 793 | + * will allow that much actual data to be received on that socket. |
|---|
| 794 | + * |
|---|
| 795 | + * Applications are unaware that "struct sk_buff" and other overheads |
|---|
| 796 | + * allocate from the receive buffer during socket buffer allocation. |
|---|
| 797 | + * |
|---|
| 798 | + * And after considering the possible alternatives, returning the value |
|---|
| 799 | + * we actually used in getsockopt is the most desirable behavior. |
|---|
| 800 | + */ |
|---|
| 801 | + WRITE_ONCE(sk->sk_rcvbuf, max_t(int, val * 2, SOCK_MIN_RCVBUF)); |
|---|
| 802 | +} |
|---|
| 803 | + |
|---|
| 804 | +void sock_set_rcvbuf(struct sock *sk, int val) |
|---|
| 805 | +{ |
|---|
| 806 | + lock_sock(sk); |
|---|
| 807 | + __sock_set_rcvbuf(sk, val); |
|---|
| 808 | + release_sock(sk); |
|---|
| 809 | +} |
|---|
| 810 | +EXPORT_SYMBOL(sock_set_rcvbuf); |
|---|
| 811 | + |
|---|
| 812 | +static void __sock_set_mark(struct sock *sk, u32 val) |
|---|
| 813 | +{ |
|---|
| 814 | + if (val != sk->sk_mark) { |
|---|
| 815 | + sk->sk_mark = val; |
|---|
| 816 | + sk_dst_reset(sk); |
|---|
| 817 | + } |
|---|
| 818 | +} |
|---|
| 819 | + |
|---|
| 820 | +void sock_set_mark(struct sock *sk, u32 val) |
|---|
| 821 | +{ |
|---|
| 822 | + lock_sock(sk); |
|---|
| 823 | + __sock_set_mark(sk, val); |
|---|
| 824 | + release_sock(sk); |
|---|
| 825 | +} |
|---|
| 826 | +EXPORT_SYMBOL(sock_set_mark); |
|---|
| 827 | + |
|---|
| 648 | 828 | /* |
|---|
| 649 | 829 | * This is meant for all protocols to use and covers goings on |
|---|
| 650 | 830 | * at the socket level. Everything here is generic. |
|---|
| 651 | 831 | */ |
|---|
| 652 | 832 | |
|---|
| 653 | 833 | int sock_setsockopt(struct socket *sock, int level, int optname, |
|---|
| 654 | | - char __user *optval, unsigned int optlen) |
|---|
| 834 | + sockptr_t optval, unsigned int optlen) |
|---|
| 655 | 835 | { |
|---|
| 656 | 836 | struct sock_txtime sk_txtime; |
|---|
| 657 | 837 | struct sock *sk = sock->sk; |
|---|
| .. | .. |
|---|
| 670 | 850 | if (optlen < sizeof(int)) |
|---|
| 671 | 851 | return -EINVAL; |
|---|
| 672 | 852 | |
|---|
| 673 | | - if (get_user(val, (int __user *)optval)) |
|---|
| 853 | + if (copy_from_sockptr(&val, optval, sizeof(val))) |
|---|
| 674 | 854 | return -EFAULT; |
|---|
| 675 | 855 | |
|---|
| 676 | 856 | valbool = val ? 1 : 0; |
|---|
| .. | .. |
|---|
| 709 | 889 | * play 'guess the biggest size' games. RCVBUF/SNDBUF |
|---|
| 710 | 890 | * are treated in BSD as hints |
|---|
| 711 | 891 | */ |
|---|
| 712 | | - val = min_t(u32, val, sysctl_wmem_max); |
|---|
| 892 | + val = min_t(u32, val, READ_ONCE(sysctl_wmem_max)); |
|---|
| 713 | 893 | set_sndbuf: |
|---|
| 894 | + /* Ensure val * 2 fits into an int, to prevent max_t() |
|---|
| 895 | + * from treating it as a negative value. |
|---|
| 896 | + */ |
|---|
| 897 | + val = min_t(int, val, INT_MAX / 2); |
|---|
| 714 | 898 | sk->sk_userlocks |= SOCK_SNDBUF_LOCK; |
|---|
| 715 | | - sk->sk_sndbuf = max_t(int, val * 2, SOCK_MIN_SNDBUF); |
|---|
| 899 | + WRITE_ONCE(sk->sk_sndbuf, |
|---|
| 900 | + max_t(int, val * 2, SOCK_MIN_SNDBUF)); |
|---|
| 716 | 901 | /* Wake up sending tasks if we upped the value. */ |
|---|
| 717 | 902 | sk->sk_write_space(sk); |
|---|
| 718 | 903 | break; |
|---|
| .. | .. |
|---|
| 722 | 907 | ret = -EPERM; |
|---|
| 723 | 908 | break; |
|---|
| 724 | 909 | } |
|---|
| 910 | + |
|---|
| 911 | + /* No negative values (to prevent underflow, as val will be |
|---|
| 912 | + * multiplied by 2). |
|---|
| 913 | + */ |
|---|
| 914 | + if (val < 0) |
|---|
| 915 | + val = 0; |
|---|
| 725 | 916 | goto set_sndbuf; |
|---|
| 726 | 917 | |
|---|
| 727 | 918 | case SO_RCVBUF: |
|---|
| .. | .. |
|---|
| 730 | 921 | * play 'guess the biggest size' games. RCVBUF/SNDBUF |
|---|
| 731 | 922 | * are treated in BSD as hints |
|---|
| 732 | 923 | */ |
|---|
| 733 | | - val = min_t(u32, val, sysctl_rmem_max); |
|---|
| 734 | | -set_rcvbuf: |
|---|
| 735 | | - sk->sk_userlocks |= SOCK_RCVBUF_LOCK; |
|---|
| 736 | | - /* |
|---|
| 737 | | - * We double it on the way in to account for |
|---|
| 738 | | - * "struct sk_buff" etc. overhead. Applications |
|---|
| 739 | | - * assume that the SO_RCVBUF setting they make will |
|---|
| 740 | | - * allow that much actual data to be received on that |
|---|
| 741 | | - * socket. |
|---|
| 742 | | - * |
|---|
| 743 | | - * Applications are unaware that "struct sk_buff" and |
|---|
| 744 | | - * other overheads allocate from the receive buffer |
|---|
| 745 | | - * during socket buffer allocation. |
|---|
| 746 | | - * |
|---|
| 747 | | - * And after considering the possible alternatives, |
|---|
| 748 | | - * returning the value we actually used in getsockopt |
|---|
| 749 | | - * is the most desirable behavior. |
|---|
| 750 | | - */ |
|---|
| 751 | | - sk->sk_rcvbuf = max_t(int, val * 2, SOCK_MIN_RCVBUF); |
|---|
| 924 | + __sock_set_rcvbuf(sk, min_t(u32, val, READ_ONCE(sysctl_rmem_max))); |
|---|
| 752 | 925 | break; |
|---|
| 753 | 926 | |
|---|
| 754 | 927 | case SO_RCVBUFFORCE: |
|---|
| .. | .. |
|---|
| 756 | 929 | ret = -EPERM; |
|---|
| 757 | 930 | break; |
|---|
| 758 | 931 | } |
|---|
| 759 | | - goto set_rcvbuf; |
|---|
| 932 | + |
|---|
| 933 | + /* No negative values (to prevent underflow, as val will be |
|---|
| 934 | + * multiplied by 2). |
|---|
| 935 | + */ |
|---|
| 936 | + __sock_set_rcvbuf(sk, max(val, 0)); |
|---|
| 937 | + break; |
|---|
| 760 | 938 | |
|---|
| 761 | 939 | case SO_KEEPALIVE: |
|---|
| 762 | 940 | if (sk->sk_prot->keepalive) |
|---|
| .. | .. |
|---|
| 785 | 963 | ret = -EINVAL; /* 1003.1g */ |
|---|
| 786 | 964 | break; |
|---|
| 787 | 965 | } |
|---|
| 788 | | - if (copy_from_user(&ling, optval, sizeof(ling))) { |
|---|
| 966 | + if (copy_from_sockptr(&ling, optval, sizeof(ling))) { |
|---|
| 789 | 967 | ret = -EFAULT; |
|---|
| 790 | 968 | break; |
|---|
| 791 | 969 | } |
|---|
| .. | .. |
|---|
| 803 | 981 | break; |
|---|
| 804 | 982 | |
|---|
| 805 | 983 | case SO_BSDCOMPAT: |
|---|
| 806 | | - sock_warn_obsolete_bsdism("setsockopt"); |
|---|
| 807 | 984 | break; |
|---|
| 808 | 985 | |
|---|
| 809 | 986 | case SO_PASSCRED: |
|---|
| .. | .. |
|---|
| 813 | 990 | clear_bit(SOCK_PASSCRED, &sock->flags); |
|---|
| 814 | 991 | break; |
|---|
| 815 | 992 | |
|---|
| 816 | | - case SO_TIMESTAMP: |
|---|
| 817 | | - case SO_TIMESTAMPNS: |
|---|
| 818 | | - if (valbool) { |
|---|
| 819 | | - if (optname == SO_TIMESTAMP) |
|---|
| 820 | | - sock_reset_flag(sk, SOCK_RCVTSTAMPNS); |
|---|
| 821 | | - else |
|---|
| 822 | | - sock_set_flag(sk, SOCK_RCVTSTAMPNS); |
|---|
| 823 | | - sock_set_flag(sk, SOCK_RCVTSTAMP); |
|---|
| 824 | | - sock_enable_timestamp(sk, SOCK_TIMESTAMP); |
|---|
| 825 | | - } else { |
|---|
| 826 | | - sock_reset_flag(sk, SOCK_RCVTSTAMP); |
|---|
| 827 | | - sock_reset_flag(sk, SOCK_RCVTSTAMPNS); |
|---|
| 828 | | - } |
|---|
| 993 | + case SO_TIMESTAMP_OLD: |
|---|
| 994 | + __sock_set_timestamps(sk, valbool, false, false); |
|---|
| 829 | 995 | break; |
|---|
| 830 | | - |
|---|
| 831 | | - case SO_TIMESTAMPING: |
|---|
| 996 | + case SO_TIMESTAMP_NEW: |
|---|
| 997 | + __sock_set_timestamps(sk, valbool, true, false); |
|---|
| 998 | + break; |
|---|
| 999 | + case SO_TIMESTAMPNS_OLD: |
|---|
| 1000 | + __sock_set_timestamps(sk, valbool, false, true); |
|---|
| 1001 | + break; |
|---|
| 1002 | + case SO_TIMESTAMPNS_NEW: |
|---|
| 1003 | + __sock_set_timestamps(sk, valbool, true, true); |
|---|
| 1004 | + break; |
|---|
| 1005 | + case SO_TIMESTAMPING_NEW: |
|---|
| 1006 | + case SO_TIMESTAMPING_OLD: |
|---|
| 832 | 1007 | if (val & ~SOF_TIMESTAMPING_MASK) { |
|---|
| 833 | 1008 | ret = -EINVAL; |
|---|
| 834 | 1009 | break; |
|---|
| .. | .. |
|---|
| 856 | 1031 | } |
|---|
| 857 | 1032 | |
|---|
| 858 | 1033 | sk->sk_tsflags = val; |
|---|
| 1034 | + sock_valbool_flag(sk, SOCK_TSTAMP_NEW, optname == SO_TIMESTAMPING_NEW); |
|---|
| 1035 | + |
|---|
| 859 | 1036 | if (val & SOF_TIMESTAMPING_RX_SOFTWARE) |
|---|
| 860 | 1037 | sock_enable_timestamp(sk, |
|---|
| 861 | 1038 | SOCK_TIMESTAMPING_RX_SOFTWARE); |
|---|
| .. | .. |
|---|
| 870 | 1047 | if (sock->ops->set_rcvlowat) |
|---|
| 871 | 1048 | ret = sock->ops->set_rcvlowat(sk, val); |
|---|
| 872 | 1049 | else |
|---|
| 873 | | - sk->sk_rcvlowat = val ? : 1; |
|---|
| 1050 | + WRITE_ONCE(sk->sk_rcvlowat, val ? : 1); |
|---|
| 874 | 1051 | break; |
|---|
| 875 | 1052 | |
|---|
| 876 | | - case SO_RCVTIMEO: |
|---|
| 877 | | - ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen); |
|---|
| 1053 | + case SO_RCVTIMEO_OLD: |
|---|
| 1054 | + case SO_RCVTIMEO_NEW: |
|---|
| 1055 | + ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, |
|---|
| 1056 | + optlen, optname == SO_RCVTIMEO_OLD); |
|---|
| 878 | 1057 | break; |
|---|
| 879 | 1058 | |
|---|
| 880 | | - case SO_SNDTIMEO: |
|---|
| 881 | | - ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen); |
|---|
| 1059 | + case SO_SNDTIMEO_OLD: |
|---|
| 1060 | + case SO_SNDTIMEO_NEW: |
|---|
| 1061 | + ret = sock_set_timeout(&sk->sk_sndtimeo, optval, |
|---|
| 1062 | + optlen, optname == SO_SNDTIMEO_OLD); |
|---|
| 882 | 1063 | break; |
|---|
| 883 | 1064 | |
|---|
| 884 | | - case SO_ATTACH_FILTER: |
|---|
| 885 | | - ret = -EINVAL; |
|---|
| 886 | | - if (optlen == sizeof(struct sock_fprog)) { |
|---|
| 887 | | - struct sock_fprog fprog; |
|---|
| 1065 | + case SO_ATTACH_FILTER: { |
|---|
| 1066 | + struct sock_fprog fprog; |
|---|
| 888 | 1067 | |
|---|
| 889 | | - ret = -EFAULT; |
|---|
| 890 | | - if (copy_from_user(&fprog, optval, sizeof(fprog))) |
|---|
| 891 | | - break; |
|---|
| 892 | | - |
|---|
| 1068 | + ret = copy_bpf_fprog_from_user(&fprog, optval, optlen); |
|---|
| 1069 | + if (!ret) |
|---|
| 893 | 1070 | ret = sk_attach_filter(&fprog, sk); |
|---|
| 894 | | - } |
|---|
| 895 | 1071 | break; |
|---|
| 896 | | - |
|---|
| 1072 | + } |
|---|
| 897 | 1073 | case SO_ATTACH_BPF: |
|---|
| 898 | 1074 | ret = -EINVAL; |
|---|
| 899 | 1075 | if (optlen == sizeof(u32)) { |
|---|
| 900 | 1076 | u32 ufd; |
|---|
| 901 | 1077 | |
|---|
| 902 | 1078 | ret = -EFAULT; |
|---|
| 903 | | - if (copy_from_user(&ufd, optval, sizeof(ufd))) |
|---|
| 1079 | + if (copy_from_sockptr(&ufd, optval, sizeof(ufd))) |
|---|
| 904 | 1080 | break; |
|---|
| 905 | 1081 | |
|---|
| 906 | 1082 | ret = sk_attach_bpf(ufd, sk); |
|---|
| 907 | 1083 | } |
|---|
| 908 | 1084 | break; |
|---|
| 909 | 1085 | |
|---|
| 910 | | - case SO_ATTACH_REUSEPORT_CBPF: |
|---|
| 911 | | - ret = -EINVAL; |
|---|
| 912 | | - if (optlen == sizeof(struct sock_fprog)) { |
|---|
| 913 | | - struct sock_fprog fprog; |
|---|
| 1086 | + case SO_ATTACH_REUSEPORT_CBPF: { |
|---|
| 1087 | + struct sock_fprog fprog; |
|---|
| 914 | 1088 | |
|---|
| 915 | | - ret = -EFAULT; |
|---|
| 916 | | - if (copy_from_user(&fprog, optval, sizeof(fprog))) |
|---|
| 917 | | - break; |
|---|
| 918 | | - |
|---|
| 1089 | + ret = copy_bpf_fprog_from_user(&fprog, optval, optlen); |
|---|
| 1090 | + if (!ret) |
|---|
| 919 | 1091 | ret = sk_reuseport_attach_filter(&fprog, sk); |
|---|
| 920 | | - } |
|---|
| 921 | 1092 | break; |
|---|
| 922 | | - |
|---|
| 1093 | + } |
|---|
| 923 | 1094 | case SO_ATTACH_REUSEPORT_EBPF: |
|---|
| 924 | 1095 | ret = -EINVAL; |
|---|
| 925 | 1096 | if (optlen == sizeof(u32)) { |
|---|
| 926 | 1097 | u32 ufd; |
|---|
| 927 | 1098 | |
|---|
| 928 | 1099 | ret = -EFAULT; |
|---|
| 929 | | - if (copy_from_user(&ufd, optval, sizeof(ufd))) |
|---|
| 1100 | + if (copy_from_sockptr(&ufd, optval, sizeof(ufd))) |
|---|
| 930 | 1101 | break; |
|---|
| 931 | 1102 | |
|---|
| 932 | 1103 | ret = sk_reuseport_attach_bpf(ufd, sk); |
|---|
| 933 | 1104 | } |
|---|
| 1105 | + break; |
|---|
| 1106 | + |
|---|
| 1107 | + case SO_DETACH_REUSEPORT_BPF: |
|---|
| 1108 | + ret = reuseport_detach_prog(sk); |
|---|
| 934 | 1109 | break; |
|---|
| 935 | 1110 | |
|---|
| 936 | 1111 | case SO_DETACH_FILTER: |
|---|
| .. | .. |
|---|
| 951 | 1126 | clear_bit(SOCK_PASSSEC, &sock->flags); |
|---|
| 952 | 1127 | break; |
|---|
| 953 | 1128 | case SO_MARK: |
|---|
| 954 | | - if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) |
|---|
| 1129 | + if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) { |
|---|
| 955 | 1130 | ret = -EPERM; |
|---|
| 956 | | - else |
|---|
| 957 | | - sk->sk_mark = val; |
|---|
| 1131 | + break; |
|---|
| 1132 | + } |
|---|
| 1133 | + |
|---|
| 1134 | + __sock_set_mark(sk, val); |
|---|
| 958 | 1135 | break; |
|---|
| 959 | 1136 | |
|---|
| 960 | 1137 | case SO_RXQ_OVFL: |
|---|
| .. | .. |
|---|
| 995 | 1172 | #endif |
|---|
| 996 | 1173 | |
|---|
| 997 | 1174 | case SO_MAX_PACING_RATE: |
|---|
| 998 | | - if (val != ~0U) |
|---|
| 1175 | + { |
|---|
| 1176 | + unsigned long ulval = (val == ~0U) ? ~0UL : (unsigned int)val; |
|---|
| 1177 | + |
|---|
| 1178 | + if (sizeof(ulval) != sizeof(val) && |
|---|
| 1179 | + optlen >= sizeof(ulval) && |
|---|
| 1180 | + copy_from_sockptr(&ulval, optval, sizeof(ulval))) { |
|---|
| 1181 | + ret = -EFAULT; |
|---|
| 1182 | + break; |
|---|
| 1183 | + } |
|---|
| 1184 | + if (ulval != ~0UL) |
|---|
| 999 | 1185 | cmpxchg(&sk->sk_pacing_status, |
|---|
| 1000 | 1186 | SK_PACING_NONE, |
|---|
| 1001 | 1187 | SK_PACING_NEEDED); |
|---|
| 1002 | | - sk->sk_max_pacing_rate = val; |
|---|
| 1003 | | - sk->sk_pacing_rate = min(sk->sk_pacing_rate, |
|---|
| 1004 | | - sk->sk_max_pacing_rate); |
|---|
| 1188 | + /* Pairs with READ_ONCE() from sk_getsockopt() */ |
|---|
| 1189 | + WRITE_ONCE(sk->sk_max_pacing_rate, ulval); |
|---|
| 1190 | + sk->sk_pacing_rate = min(sk->sk_pacing_rate, ulval); |
|---|
| 1005 | 1191 | break; |
|---|
| 1006 | | - |
|---|
| 1192 | + } |
|---|
| 1007 | 1193 | case SO_INCOMING_CPU: |
|---|
| 1008 | 1194 | WRITE_ONCE(sk->sk_incoming_cpu, val); |
|---|
| 1009 | 1195 | break; |
|---|
| .. | .. |
|---|
| 1015 | 1201 | |
|---|
| 1016 | 1202 | case SO_ZEROCOPY: |
|---|
| 1017 | 1203 | if (sk->sk_family == PF_INET || sk->sk_family == PF_INET6) { |
|---|
| 1018 | | - if (sk->sk_protocol != IPPROTO_TCP) |
|---|
| 1204 | + if (!((sk->sk_type == SOCK_STREAM && |
|---|
| 1205 | + sk->sk_protocol == IPPROTO_TCP) || |
|---|
| 1206 | + (sk->sk_type == SOCK_DGRAM && |
|---|
| 1207 | + sk->sk_protocol == IPPROTO_UDP))) |
|---|
| 1019 | 1208 | ret = -ENOTSUPP; |
|---|
| 1020 | 1209 | } else if (sk->sk_family != PF_RDS) { |
|---|
| 1021 | 1210 | ret = -ENOTSUPP; |
|---|
| .. | .. |
|---|
| 1029 | 1218 | break; |
|---|
| 1030 | 1219 | |
|---|
| 1031 | 1220 | case SO_TXTIME: |
|---|
| 1032 | | - if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) { |
|---|
| 1033 | | - ret = -EPERM; |
|---|
| 1034 | | - } else if (optlen != sizeof(struct sock_txtime)) { |
|---|
| 1221 | + if (optlen != sizeof(struct sock_txtime)) { |
|---|
| 1035 | 1222 | ret = -EINVAL; |
|---|
| 1036 | | - } else if (copy_from_user(&sk_txtime, optval, |
|---|
| 1223 | + break; |
|---|
| 1224 | + } else if (copy_from_sockptr(&sk_txtime, optval, |
|---|
| 1037 | 1225 | sizeof(struct sock_txtime))) { |
|---|
| 1038 | 1226 | ret = -EFAULT; |
|---|
| 1227 | + break; |
|---|
| 1039 | 1228 | } else if (sk_txtime.flags & ~SOF_TXTIME_FLAGS_MASK) { |
|---|
| 1040 | 1229 | ret = -EINVAL; |
|---|
| 1041 | | - } else { |
|---|
| 1042 | | - sock_valbool_flag(sk, SOCK_TXTIME, true); |
|---|
| 1043 | | - sk->sk_clockid = sk_txtime.clockid; |
|---|
| 1044 | | - sk->sk_txtime_deadline_mode = |
|---|
| 1045 | | - !!(sk_txtime.flags & SOF_TXTIME_DEADLINE_MODE); |
|---|
| 1046 | | - sk->sk_txtime_report_errors = |
|---|
| 1047 | | - !!(sk_txtime.flags & SOF_TXTIME_REPORT_ERRORS); |
|---|
| 1230 | + break; |
|---|
| 1048 | 1231 | } |
|---|
| 1232 | + /* CLOCK_MONOTONIC is only used by sch_fq, and this packet |
|---|
| 1233 | + * scheduler has enough safe guards. |
|---|
| 1234 | + */ |
|---|
| 1235 | + if (sk_txtime.clockid != CLOCK_MONOTONIC && |
|---|
| 1236 | + !ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) { |
|---|
| 1237 | + ret = -EPERM; |
|---|
| 1238 | + break; |
|---|
| 1239 | + } |
|---|
| 1240 | + sock_valbool_flag(sk, SOCK_TXTIME, true); |
|---|
| 1241 | + sk->sk_clockid = sk_txtime.clockid; |
|---|
| 1242 | + sk->sk_txtime_deadline_mode = |
|---|
| 1243 | + !!(sk_txtime.flags & SOF_TXTIME_DEADLINE_MODE); |
|---|
| 1244 | + sk->sk_txtime_report_errors = |
|---|
| 1245 | + !!(sk_txtime.flags & SOF_TXTIME_REPORT_ERRORS); |
|---|
| 1246 | + break; |
|---|
| 1247 | + |
|---|
| 1248 | + case SO_BINDTOIFINDEX: |
|---|
| 1249 | + ret = sock_bindtoindex_locked(sk, val); |
|---|
| 1049 | 1250 | break; |
|---|
| 1050 | 1251 | |
|---|
| 1051 | 1252 | default: |
|---|
| .. | .. |
|---|
| 1101 | 1302 | union { |
|---|
| 1102 | 1303 | int val; |
|---|
| 1103 | 1304 | u64 val64; |
|---|
| 1305 | + unsigned long ulval; |
|---|
| 1104 | 1306 | struct linger ling; |
|---|
| 1105 | | - struct timeval tm; |
|---|
| 1307 | + struct old_timeval32 tm32; |
|---|
| 1308 | + struct __kernel_old_timeval tm; |
|---|
| 1309 | + struct __kernel_sock_timeval stm; |
|---|
| 1106 | 1310 | struct sock_txtime txtime; |
|---|
| 1107 | 1311 | } v; |
|---|
| 1108 | 1312 | |
|---|
| .. | .. |
|---|
| 1130 | 1334 | break; |
|---|
| 1131 | 1335 | |
|---|
| 1132 | 1336 | case SO_SNDBUF: |
|---|
| 1133 | | - v.val = sk->sk_sndbuf; |
|---|
| 1337 | + v.val = READ_ONCE(sk->sk_sndbuf); |
|---|
| 1134 | 1338 | break; |
|---|
| 1135 | 1339 | |
|---|
| 1136 | 1340 | case SO_RCVBUF: |
|---|
| 1137 | | - v.val = sk->sk_rcvbuf; |
|---|
| 1341 | + v.val = READ_ONCE(sk->sk_rcvbuf); |
|---|
| 1138 | 1342 | break; |
|---|
| 1139 | 1343 | |
|---|
| 1140 | 1344 | case SO_REUSEADDR: |
|---|
| .. | .. |
|---|
| 1186 | 1390 | break; |
|---|
| 1187 | 1391 | |
|---|
| 1188 | 1392 | case SO_BSDCOMPAT: |
|---|
| 1189 | | - sock_warn_obsolete_bsdism("getsockopt"); |
|---|
| 1190 | 1393 | break; |
|---|
| 1191 | 1394 | |
|---|
| 1192 | | - case SO_TIMESTAMP: |
|---|
| 1395 | + case SO_TIMESTAMP_OLD: |
|---|
| 1193 | 1396 | v.val = sock_flag(sk, SOCK_RCVTSTAMP) && |
|---|
| 1397 | + !sock_flag(sk, SOCK_TSTAMP_NEW) && |
|---|
| 1194 | 1398 | !sock_flag(sk, SOCK_RCVTSTAMPNS); |
|---|
| 1195 | 1399 | break; |
|---|
| 1196 | 1400 | |
|---|
| 1197 | | - case SO_TIMESTAMPNS: |
|---|
| 1198 | | - v.val = sock_flag(sk, SOCK_RCVTSTAMPNS); |
|---|
| 1401 | + case SO_TIMESTAMPNS_OLD: |
|---|
| 1402 | + v.val = sock_flag(sk, SOCK_RCVTSTAMPNS) && !sock_flag(sk, SOCK_TSTAMP_NEW); |
|---|
| 1199 | 1403 | break; |
|---|
| 1200 | 1404 | |
|---|
| 1201 | | - case SO_TIMESTAMPING: |
|---|
| 1405 | + case SO_TIMESTAMP_NEW: |
|---|
| 1406 | + v.val = sock_flag(sk, SOCK_RCVTSTAMP) && sock_flag(sk, SOCK_TSTAMP_NEW); |
|---|
| 1407 | + break; |
|---|
| 1408 | + |
|---|
| 1409 | + case SO_TIMESTAMPNS_NEW: |
|---|
| 1410 | + v.val = sock_flag(sk, SOCK_RCVTSTAMPNS) && sock_flag(sk, SOCK_TSTAMP_NEW); |
|---|
| 1411 | + break; |
|---|
| 1412 | + |
|---|
| 1413 | + case SO_TIMESTAMPING_OLD: |
|---|
| 1202 | 1414 | v.val = sk->sk_tsflags; |
|---|
| 1203 | 1415 | break; |
|---|
| 1204 | 1416 | |
|---|
| 1205 | | - case SO_RCVTIMEO: |
|---|
| 1206 | | - lv = sizeof(struct timeval); |
|---|
| 1207 | | - if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) { |
|---|
| 1208 | | - v.tm.tv_sec = 0; |
|---|
| 1209 | | - v.tm.tv_usec = 0; |
|---|
| 1210 | | - } else { |
|---|
| 1211 | | - v.tm.tv_sec = sk->sk_rcvtimeo / HZ; |
|---|
| 1212 | | - v.tm.tv_usec = ((sk->sk_rcvtimeo % HZ) * USEC_PER_SEC) / HZ; |
|---|
| 1213 | | - } |
|---|
| 1417 | + case SO_RCVTIMEO_OLD: |
|---|
| 1418 | + case SO_RCVTIMEO_NEW: |
|---|
| 1419 | + lv = sock_get_timeout(sk->sk_rcvtimeo, &v, SO_RCVTIMEO_OLD == optname); |
|---|
| 1214 | 1420 | break; |
|---|
| 1215 | 1421 | |
|---|
| 1216 | | - case SO_SNDTIMEO: |
|---|
| 1217 | | - lv = sizeof(struct timeval); |
|---|
| 1218 | | - if (sk->sk_sndtimeo == MAX_SCHEDULE_TIMEOUT) { |
|---|
| 1219 | | - v.tm.tv_sec = 0; |
|---|
| 1220 | | - v.tm.tv_usec = 0; |
|---|
| 1221 | | - } else { |
|---|
| 1222 | | - v.tm.tv_sec = sk->sk_sndtimeo / HZ; |
|---|
| 1223 | | - v.tm.tv_usec = ((sk->sk_sndtimeo % HZ) * USEC_PER_SEC) / HZ; |
|---|
| 1224 | | - } |
|---|
| 1422 | + case SO_SNDTIMEO_OLD: |
|---|
| 1423 | + case SO_SNDTIMEO_NEW: |
|---|
| 1424 | + lv = sock_get_timeout(sk->sk_sndtimeo, &v, SO_SNDTIMEO_OLD == optname); |
|---|
| 1225 | 1425 | break; |
|---|
| 1226 | 1426 | |
|---|
| 1227 | 1427 | case SO_RCVLOWAT: |
|---|
| 1228 | | - v.val = sk->sk_rcvlowat; |
|---|
| 1428 | + v.val = READ_ONCE(sk->sk_rcvlowat); |
|---|
| 1229 | 1429 | break; |
|---|
| 1230 | 1430 | |
|---|
| 1231 | 1431 | case SO_SNDLOWAT: |
|---|
| .. | .. |
|---|
| 1319 | 1519 | if (!sock->ops->set_peek_off) |
|---|
| 1320 | 1520 | return -EOPNOTSUPP; |
|---|
| 1321 | 1521 | |
|---|
| 1322 | | - v.val = sk->sk_peek_off; |
|---|
| 1522 | + v.val = READ_ONCE(sk->sk_peek_off); |
|---|
| 1323 | 1523 | break; |
|---|
| 1324 | 1524 | case SO_NOFCS: |
|---|
| 1325 | 1525 | v.val = sock_flag(sk, SOCK_NOFCS); |
|---|
| .. | .. |
|---|
| 1349 | 1549 | |
|---|
| 1350 | 1550 | #ifdef CONFIG_NET_RX_BUSY_POLL |
|---|
| 1351 | 1551 | case SO_BUSY_POLL: |
|---|
| 1352 | | - v.val = sk->sk_ll_usec; |
|---|
| 1552 | + v.val = READ_ONCE(sk->sk_ll_usec); |
|---|
| 1353 | 1553 | break; |
|---|
| 1354 | 1554 | #endif |
|---|
| 1355 | 1555 | |
|---|
| 1356 | 1556 | case SO_MAX_PACING_RATE: |
|---|
| 1357 | | - v.val = sk->sk_max_pacing_rate; |
|---|
| 1557 | + /* The READ_ONCE() pair with the WRITE_ONCE() in sk_setsockopt() */ |
|---|
| 1558 | + if (sizeof(v.ulval) != sizeof(v.val) && len >= sizeof(v.ulval)) { |
|---|
| 1559 | + lv = sizeof(v.ulval); |
|---|
| 1560 | + v.ulval = READ_ONCE(sk->sk_max_pacing_rate); |
|---|
| 1561 | + } else { |
|---|
| 1562 | + /* 32bit version */ |
|---|
| 1563 | + v.val = min_t(unsigned long, ~0U, |
|---|
| 1564 | + READ_ONCE(sk->sk_max_pacing_rate)); |
|---|
| 1565 | + } |
|---|
| 1358 | 1566 | break; |
|---|
| 1359 | 1567 | |
|---|
| 1360 | 1568 | case SO_INCOMING_CPU: |
|---|
| .. | .. |
|---|
| 1405 | 1613 | SOF_TXTIME_REPORT_ERRORS : 0; |
|---|
| 1406 | 1614 | break; |
|---|
| 1407 | 1615 | |
|---|
| 1616 | + case SO_BINDTOIFINDEX: |
|---|
| 1617 | + v.val = sk->sk_bound_dev_if; |
|---|
| 1618 | + break; |
|---|
| 1619 | + |
|---|
| 1620 | + case SO_NETNS_COOKIE: |
|---|
| 1621 | + lv = sizeof(u64); |
|---|
| 1622 | + if (len != lv) |
|---|
| 1623 | + return -EINVAL; |
|---|
| 1624 | + v.val64 = atomic64_read(&sock_net(sk)->net_cookie); |
|---|
| 1625 | + break; |
|---|
| 1626 | + |
|---|
| 1408 | 1627 | default: |
|---|
| 1409 | 1628 | /* We implement the SO_SNDLOWAT etc to not be settable |
|---|
| 1410 | 1629 | * (1003.1g 7). |
|---|
| .. | .. |
|---|
| 1452 | 1671 | */ |
|---|
| 1453 | 1672 | static void sock_copy(struct sock *nsk, const struct sock *osk) |
|---|
| 1454 | 1673 | { |
|---|
| 1674 | + const struct proto *prot = READ_ONCE(osk->sk_prot); |
|---|
| 1455 | 1675 | #ifdef CONFIG_SECURITY_NETWORK |
|---|
| 1456 | 1676 | void *sptr = nsk->sk_security; |
|---|
| 1457 | 1677 | #endif |
|---|
| 1458 | 1678 | memcpy(nsk, osk, offsetof(struct sock, sk_dontcopy_begin)); |
|---|
| 1459 | 1679 | |
|---|
| 1460 | 1680 | memcpy(&nsk->sk_dontcopy_end, &osk->sk_dontcopy_end, |
|---|
| 1461 | | - osk->sk_prot->obj_size - offsetof(struct sock, sk_dontcopy_end)); |
|---|
| 1681 | + prot->obj_size - offsetof(struct sock, sk_dontcopy_end)); |
|---|
| 1462 | 1682 | |
|---|
| 1463 | 1683 | #ifdef CONFIG_SECURITY_NETWORK |
|---|
| 1464 | 1684 | nsk->sk_security = sptr; |
|---|
| .. | .. |
|---|
| 1584 | 1804 | |
|---|
| 1585 | 1805 | sock_disable_timestamp(sk, SK_FLAGS_TIMESTAMP); |
|---|
| 1586 | 1806 | |
|---|
| 1807 | +#ifdef CONFIG_BPF_SYSCALL |
|---|
| 1808 | + bpf_sk_storage_free(sk); |
|---|
| 1809 | +#endif |
|---|
| 1810 | + |
|---|
| 1587 | 1811 | if (atomic_read(&sk->sk_omem_alloc)) |
|---|
| 1588 | 1812 | pr_debug("%s: optmem leakage (%d bytes) detected\n", |
|---|
| 1589 | 1813 | __func__, atomic_read(&sk->sk_omem_alloc)); |
|---|
| .. | .. |
|---|
| 1670 | 1894 | */ |
|---|
| 1671 | 1895 | struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) |
|---|
| 1672 | 1896 | { |
|---|
| 1673 | | - struct sock *newsk; |
|---|
| 1897 | + struct proto *prot = READ_ONCE(sk->sk_prot); |
|---|
| 1898 | + struct sk_filter *filter; |
|---|
| 1674 | 1899 | bool is_charged = true; |
|---|
| 1900 | + struct sock *newsk; |
|---|
| 1675 | 1901 | |
|---|
| 1676 | | - newsk = sk_prot_alloc(sk->sk_prot, priority, sk->sk_family); |
|---|
| 1677 | | - if (newsk != NULL) { |
|---|
| 1678 | | - struct sk_filter *filter; |
|---|
| 1902 | + newsk = sk_prot_alloc(prot, priority, sk->sk_family); |
|---|
| 1903 | + if (!newsk) |
|---|
| 1904 | + goto out; |
|---|
| 1679 | 1905 | |
|---|
| 1680 | | - sock_copy(newsk, sk); |
|---|
| 1906 | + sock_copy(newsk, sk); |
|---|
| 1681 | 1907 | |
|---|
| 1682 | | - newsk->sk_prot_creator = sk->sk_prot; |
|---|
| 1908 | + newsk->sk_prot_creator = prot; |
|---|
| 1683 | 1909 | |
|---|
| 1684 | | - /* SANITY */ |
|---|
| 1685 | | - if (likely(newsk->sk_net_refcnt)) |
|---|
| 1686 | | - get_net(sock_net(newsk)); |
|---|
| 1687 | | - sk_node_init(&newsk->sk_node); |
|---|
| 1688 | | - sock_lock_init(newsk); |
|---|
| 1689 | | - bh_lock_sock(newsk); |
|---|
| 1690 | | - newsk->sk_backlog.head = newsk->sk_backlog.tail = NULL; |
|---|
| 1691 | | - newsk->sk_backlog.len = 0; |
|---|
| 1692 | | - |
|---|
| 1693 | | - atomic_set(&newsk->sk_rmem_alloc, 0); |
|---|
| 1694 | | - /* |
|---|
| 1695 | | - * sk_wmem_alloc set to one (see sk_free() and sock_wfree()) |
|---|
| 1696 | | - */ |
|---|
| 1697 | | - refcount_set(&newsk->sk_wmem_alloc, 1); |
|---|
| 1698 | | - atomic_set(&newsk->sk_omem_alloc, 0); |
|---|
| 1699 | | - sk_init_common(newsk); |
|---|
| 1700 | | - |
|---|
| 1701 | | - newsk->sk_dst_cache = NULL; |
|---|
| 1702 | | - newsk->sk_dst_pending_confirm = 0; |
|---|
| 1703 | | - newsk->sk_wmem_queued = 0; |
|---|
| 1704 | | - newsk->sk_forward_alloc = 0; |
|---|
| 1705 | | - atomic_set(&newsk->sk_drops, 0); |
|---|
| 1706 | | - newsk->sk_send_head = NULL; |
|---|
| 1707 | | - newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK; |
|---|
| 1708 | | - atomic_set(&newsk->sk_zckey, 0); |
|---|
| 1709 | | - |
|---|
| 1710 | | - sock_reset_flag(newsk, SOCK_DONE); |
|---|
| 1711 | | - |
|---|
| 1712 | | - /* sk->sk_memcg will be populated at accept() time */ |
|---|
| 1713 | | - newsk->sk_memcg = NULL; |
|---|
| 1714 | | - |
|---|
| 1715 | | - cgroup_sk_clone(&newsk->sk_cgrp_data); |
|---|
| 1716 | | - |
|---|
| 1717 | | - rcu_read_lock(); |
|---|
| 1718 | | - filter = rcu_dereference(sk->sk_filter); |
|---|
| 1719 | | - if (filter != NULL) |
|---|
| 1720 | | - /* though it's an empty new sock, the charging may fail |
|---|
| 1721 | | - * if sysctl_optmem_max was changed between creation of |
|---|
| 1722 | | - * original socket and cloning |
|---|
| 1723 | | - */ |
|---|
| 1724 | | - is_charged = sk_filter_charge(newsk, filter); |
|---|
| 1725 | | - RCU_INIT_POINTER(newsk->sk_filter, filter); |
|---|
| 1726 | | - rcu_read_unlock(); |
|---|
| 1727 | | - |
|---|
| 1728 | | - if (unlikely(!is_charged || xfrm_sk_clone_policy(newsk, sk))) { |
|---|
| 1729 | | - /* We need to make sure that we don't uncharge the new |
|---|
| 1730 | | - * socket if we couldn't charge it in the first place |
|---|
| 1731 | | - * as otherwise we uncharge the parent's filter. |
|---|
| 1732 | | - */ |
|---|
| 1733 | | - if (!is_charged) |
|---|
| 1734 | | - RCU_INIT_POINTER(newsk->sk_filter, NULL); |
|---|
| 1735 | | - sk_free_unlock_clone(newsk); |
|---|
| 1736 | | - newsk = NULL; |
|---|
| 1737 | | - goto out; |
|---|
| 1738 | | - } |
|---|
| 1739 | | - RCU_INIT_POINTER(newsk->sk_reuseport_cb, NULL); |
|---|
| 1740 | | - |
|---|
| 1741 | | - newsk->sk_err = 0; |
|---|
| 1742 | | - newsk->sk_err_soft = 0; |
|---|
| 1743 | | - newsk->sk_priority = 0; |
|---|
| 1744 | | - newsk->sk_incoming_cpu = raw_smp_processor_id(); |
|---|
| 1745 | | - atomic64_set(&newsk->sk_cookie, 0); |
|---|
| 1746 | | - if (likely(newsk->sk_net_refcnt)) |
|---|
| 1747 | | - sock_inuse_add(sock_net(newsk), 1); |
|---|
| 1748 | | - |
|---|
| 1749 | | - /* |
|---|
| 1750 | | - * Before updating sk_refcnt, we must commit prior changes to memory |
|---|
| 1751 | | - * (Documentation/RCU/rculist_nulls.txt for details) |
|---|
| 1752 | | - */ |
|---|
| 1753 | | - smp_wmb(); |
|---|
| 1754 | | - refcount_set(&newsk->sk_refcnt, 2); |
|---|
| 1755 | | - |
|---|
| 1756 | | - /* |
|---|
| 1757 | | - * Increment the counter in the same struct proto as the master |
|---|
| 1758 | | - * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that |
|---|
| 1759 | | - * is the same as sk->sk_prot->socks, as this field was copied |
|---|
| 1760 | | - * with memcpy). |
|---|
| 1761 | | - * |
|---|
| 1762 | | - * This _changes_ the previous behaviour, where |
|---|
| 1763 | | - * tcp_create_openreq_child always was incrementing the |
|---|
| 1764 | | - * equivalent to tcp_prot->socks (inet_sock_nr), so this have |
|---|
| 1765 | | - * to be taken into account in all callers. -acme |
|---|
| 1766 | | - */ |
|---|
| 1767 | | - sk_refcnt_debug_inc(newsk); |
|---|
| 1768 | | - sk_set_socket(newsk, NULL); |
|---|
| 1769 | | - sk_tx_queue_clear(newsk); |
|---|
| 1770 | | - newsk->sk_wq = NULL; |
|---|
| 1771 | | - |
|---|
| 1772 | | - if (newsk->sk_prot->sockets_allocated) |
|---|
| 1773 | | - sk_sockets_allocated_inc(newsk); |
|---|
| 1774 | | - |
|---|
| 1775 | | - if (sock_needs_netstamp(sk) && |
|---|
| 1776 | | - newsk->sk_flags & SK_FLAGS_TIMESTAMP) |
|---|
| 1777 | | - net_enable_timestamp(); |
|---|
| 1910 | + /* SANITY */ |
|---|
| 1911 | + if (likely(newsk->sk_net_refcnt)) { |
|---|
| 1912 | + get_net(sock_net(newsk)); |
|---|
| 1913 | + sock_inuse_add(sock_net(newsk), 1); |
|---|
| 1778 | 1914 | } |
|---|
| 1915 | + sk_node_init(&newsk->sk_node); |
|---|
| 1916 | + sock_lock_init(newsk); |
|---|
| 1917 | + bh_lock_sock(newsk); |
|---|
| 1918 | + newsk->sk_backlog.head = newsk->sk_backlog.tail = NULL; |
|---|
| 1919 | + newsk->sk_backlog.len = 0; |
|---|
| 1920 | + |
|---|
| 1921 | + atomic_set(&newsk->sk_rmem_alloc, 0); |
|---|
| 1922 | + |
|---|
| 1923 | + /* sk_wmem_alloc set to one (see sk_free() and sock_wfree()) */ |
|---|
| 1924 | + refcount_set(&newsk->sk_wmem_alloc, 1); |
|---|
| 1925 | + |
|---|
| 1926 | + atomic_set(&newsk->sk_omem_alloc, 0); |
|---|
| 1927 | + sk_init_common(newsk); |
|---|
| 1928 | + |
|---|
| 1929 | + newsk->sk_dst_cache = NULL; |
|---|
| 1930 | + newsk->sk_dst_pending_confirm = 0; |
|---|
| 1931 | + newsk->sk_wmem_queued = 0; |
|---|
| 1932 | + newsk->sk_forward_alloc = 0; |
|---|
| 1933 | + atomic_set(&newsk->sk_drops, 0); |
|---|
| 1934 | + newsk->sk_send_head = NULL; |
|---|
| 1935 | + newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK; |
|---|
| 1936 | + atomic_set(&newsk->sk_zckey, 0); |
|---|
| 1937 | + |
|---|
| 1938 | + sock_reset_flag(newsk, SOCK_DONE); |
|---|
| 1939 | + |
|---|
| 1940 | + /* sk->sk_memcg will be populated at accept() time */ |
|---|
| 1941 | + newsk->sk_memcg = NULL; |
|---|
| 1942 | + |
|---|
| 1943 | + cgroup_sk_clone(&newsk->sk_cgrp_data); |
|---|
| 1944 | + |
|---|
| 1945 | + rcu_read_lock(); |
|---|
| 1946 | + filter = rcu_dereference(sk->sk_filter); |
|---|
| 1947 | + if (filter != NULL) |
|---|
| 1948 | + /* though it's an empty new sock, the charging may fail |
|---|
| 1949 | + * if sysctl_optmem_max was changed between creation of |
|---|
| 1950 | + * original socket and cloning |
|---|
| 1951 | + */ |
|---|
| 1952 | + is_charged = sk_filter_charge(newsk, filter); |
|---|
| 1953 | + RCU_INIT_POINTER(newsk->sk_filter, filter); |
|---|
| 1954 | + rcu_read_unlock(); |
|---|
| 1955 | + |
|---|
| 1956 | + if (unlikely(!is_charged || xfrm_sk_clone_policy(newsk, sk))) { |
|---|
| 1957 | + /* We need to make sure that we don't uncharge the new |
|---|
| 1958 | + * socket if we couldn't charge it in the first place |
|---|
| 1959 | + * as otherwise we uncharge the parent's filter. |
|---|
| 1960 | + */ |
|---|
| 1961 | + if (!is_charged) |
|---|
| 1962 | + RCU_INIT_POINTER(newsk->sk_filter, NULL); |
|---|
| 1963 | + sk_free_unlock_clone(newsk); |
|---|
| 1964 | + newsk = NULL; |
|---|
| 1965 | + goto out; |
|---|
| 1966 | + } |
|---|
| 1967 | + RCU_INIT_POINTER(newsk->sk_reuseport_cb, NULL); |
|---|
| 1968 | + |
|---|
| 1969 | + if (bpf_sk_storage_clone(sk, newsk)) { |
|---|
| 1970 | + sk_free_unlock_clone(newsk); |
|---|
| 1971 | + newsk = NULL; |
|---|
| 1972 | + goto out; |
|---|
| 1973 | + } |
|---|
| 1974 | + |
|---|
| 1975 | + /* Clear sk_user_data if parent had the pointer tagged |
|---|
| 1976 | + * as not suitable for copying when cloning. |
|---|
| 1977 | + */ |
|---|
| 1978 | + if (sk_user_data_is_nocopy(newsk)) |
|---|
| 1979 | + newsk->sk_user_data = NULL; |
|---|
| 1980 | + |
|---|
| 1981 | + newsk->sk_err = 0; |
|---|
| 1982 | + newsk->sk_err_soft = 0; |
|---|
| 1983 | + newsk->sk_priority = 0; |
|---|
| 1984 | + newsk->sk_incoming_cpu = raw_smp_processor_id(); |
|---|
| 1985 | + |
|---|
| 1986 | + /* Before updating sk_refcnt, we must commit prior changes to memory |
|---|
| 1987 | + * (Documentation/RCU/rculist_nulls.rst for details) |
|---|
| 1988 | + */ |
|---|
| 1989 | + smp_wmb(); |
|---|
| 1990 | + refcount_set(&newsk->sk_refcnt, 2); |
|---|
| 1991 | + |
|---|
| 1992 | + /* Increment the counter in the same struct proto as the master |
|---|
| 1993 | + * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that |
|---|
| 1994 | + * is the same as sk->sk_prot->socks, as this field was copied |
|---|
| 1995 | + * with memcpy). |
|---|
| 1996 | + * |
|---|
| 1997 | + * This _changes_ the previous behaviour, where |
|---|
| 1998 | + * tcp_create_openreq_child always was incrementing the |
|---|
| 1999 | + * equivalent to tcp_prot->socks (inet_sock_nr), so this have |
|---|
| 2000 | + * to be taken into account in all callers. -acme |
|---|
| 2001 | + */ |
|---|
| 2002 | + sk_refcnt_debug_inc(newsk); |
|---|
| 2003 | + sk_set_socket(newsk, NULL); |
|---|
| 2004 | + sk_tx_queue_clear(newsk); |
|---|
| 2005 | + RCU_INIT_POINTER(newsk->sk_wq, NULL); |
|---|
| 2006 | + |
|---|
| 2007 | + if (newsk->sk_prot->sockets_allocated) |
|---|
| 2008 | + sk_sockets_allocated_inc(newsk); |
|---|
| 2009 | + |
|---|
| 2010 | + if (sock_needs_netstamp(sk) && newsk->sk_flags & SK_FLAGS_TIMESTAMP) |
|---|
| 2011 | + net_enable_timestamp(); |
|---|
| 1779 | 2012 | out: |
|---|
| 1780 | 2013 | return newsk; |
|---|
| 1781 | 2014 | } |
|---|
| .. | .. |
|---|
| 1795 | 2028 | { |
|---|
| 1796 | 2029 | u32 max_segs = 1; |
|---|
| 1797 | 2030 | |
|---|
| 1798 | | - sk_dst_set(sk, dst); |
|---|
| 1799 | 2031 | sk->sk_route_caps = dst->dev->features | sk->sk_route_forced_caps; |
|---|
| 1800 | 2032 | if (sk->sk_route_caps & NETIF_F_GSO) |
|---|
| 1801 | 2033 | sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE; |
|---|
| .. | .. |
|---|
| 1810 | 2042 | } |
|---|
| 1811 | 2043 | } |
|---|
| 1812 | 2044 | sk->sk_gso_max_segs = max_segs; |
|---|
| 2045 | + sk_dst_set(sk, dst); |
|---|
| 1813 | 2046 | } |
|---|
| 1814 | 2047 | EXPORT_SYMBOL_GPL(sk_setup_caps); |
|---|
| 1815 | 2048 | |
|---|
| .. | .. |
|---|
| 1877 | 2110 | } |
|---|
| 1878 | 2111 | EXPORT_SYMBOL(skb_set_owner_w); |
|---|
| 1879 | 2112 | |
|---|
| 2113 | +static bool can_skb_orphan_partial(const struct sk_buff *skb) |
|---|
| 2114 | +{ |
|---|
| 2115 | +#ifdef CONFIG_TLS_DEVICE |
|---|
| 2116 | + /* Drivers depend on in-order delivery for crypto offload, |
|---|
| 2117 | + * partial orphan breaks out-of-order-OK logic. |
|---|
| 2118 | + */ |
|---|
| 2119 | + if (skb->decrypted) |
|---|
| 2120 | + return false; |
|---|
| 2121 | +#endif |
|---|
| 2122 | + return (skb->destructor == sock_wfree || |
|---|
| 2123 | + (IS_ENABLED(CONFIG_INET) && skb->destructor == tcp_wfree)); |
|---|
| 2124 | +} |
|---|
| 2125 | + |
|---|
| 1880 | 2126 | /* This helper is used by netem, as it can hold packets in its |
|---|
| 1881 | 2127 | * delay queue. We want to allow the owner socket to send more |
|---|
| 1882 | 2128 | * packets, as if they were already TX completed by a typical driver. |
|---|
| .. | .. |
|---|
| 1888 | 2134 | if (skb_is_tcp_pure_ack(skb)) |
|---|
| 1889 | 2135 | return; |
|---|
| 1890 | 2136 | |
|---|
| 1891 | | - if (skb->destructor == sock_wfree |
|---|
| 1892 | | -#ifdef CONFIG_INET |
|---|
| 1893 | | - || skb->destructor == tcp_wfree |
|---|
| 1894 | | -#endif |
|---|
| 1895 | | - ) { |
|---|
| 1896 | | - struct sock *sk = skb->sk; |
|---|
| 2137 | + if (can_skb_orphan_partial(skb) && skb_set_owner_sk_safe(skb, skb->sk)) |
|---|
| 2138 | + return; |
|---|
| 1897 | 2139 | |
|---|
| 1898 | | - if (refcount_inc_not_zero(&sk->sk_refcnt)) { |
|---|
| 1899 | | - WARN_ON(refcount_sub_and_test(skb->truesize, &sk->sk_wmem_alloc)); |
|---|
| 1900 | | - skb->destructor = sock_efree; |
|---|
| 1901 | | - } |
|---|
| 1902 | | - } else { |
|---|
| 1903 | | - skb_orphan(skb); |
|---|
| 1904 | | - } |
|---|
| 2140 | + skb_orphan(skb); |
|---|
| 1905 | 2141 | } |
|---|
| 1906 | 2142 | EXPORT_SYMBOL(skb_orphan_partial); |
|---|
| 1907 | 2143 | |
|---|
| .. | .. |
|---|
| 1928 | 2164 | } |
|---|
| 1929 | 2165 | EXPORT_SYMBOL(sock_efree); |
|---|
| 1930 | 2166 | |
|---|
| 2167 | +/* Buffer destructor for prefetch/receive path where reference count may |
|---|
| 2168 | + * not be held, e.g. for listen sockets. |
|---|
| 2169 | + */ |
|---|
| 2170 | +#ifdef CONFIG_INET |
|---|
| 2171 | +void sock_pfree(struct sk_buff *skb) |
|---|
| 2172 | +{ |
|---|
| 2173 | + if (sk_is_refcounted(skb->sk)) |
|---|
| 2174 | + sock_gen_put(skb->sk); |
|---|
| 2175 | +} |
|---|
| 2176 | +EXPORT_SYMBOL(sock_pfree); |
|---|
| 2177 | +#endif /* CONFIG_INET */ |
|---|
| 2178 | + |
|---|
| 1931 | 2179 | kuid_t sock_i_uid(struct sock *sk) |
|---|
| 1932 | 2180 | { |
|---|
| 1933 | 2181 | kuid_t uid; |
|---|
| .. | .. |
|---|
| 1939 | 2187 | } |
|---|
| 1940 | 2188 | EXPORT_SYMBOL(sock_i_uid); |
|---|
| 1941 | 2189 | |
|---|
| 2190 | +unsigned long __sock_i_ino(struct sock *sk) |
|---|
| 2191 | +{ |
|---|
| 2192 | + unsigned long ino; |
|---|
| 2193 | + |
|---|
| 2194 | + read_lock(&sk->sk_callback_lock); |
|---|
| 2195 | + ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0; |
|---|
| 2196 | + read_unlock(&sk->sk_callback_lock); |
|---|
| 2197 | + return ino; |
|---|
| 2198 | +} |
|---|
| 2199 | +EXPORT_SYMBOL(__sock_i_ino); |
|---|
| 2200 | + |
|---|
| 1942 | 2201 | unsigned long sock_i_ino(struct sock *sk) |
|---|
| 1943 | 2202 | { |
|---|
| 1944 | 2203 | unsigned long ino; |
|---|
| 1945 | 2204 | |
|---|
| 1946 | | - read_lock_bh(&sk->sk_callback_lock); |
|---|
| 1947 | | - ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0; |
|---|
| 1948 | | - read_unlock_bh(&sk->sk_callback_lock); |
|---|
| 2205 | + local_bh_disable(); |
|---|
| 2206 | + ino = __sock_i_ino(sk); |
|---|
| 2207 | + local_bh_enable(); |
|---|
| 1949 | 2208 | return ino; |
|---|
| 1950 | 2209 | } |
|---|
| 1951 | 2210 | EXPORT_SYMBOL(sock_i_ino); |
|---|
| .. | .. |
|---|
| 1956 | 2215 | struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, |
|---|
| 1957 | 2216 | gfp_t priority) |
|---|
| 1958 | 2217 | { |
|---|
| 1959 | | - if (force || refcount_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) { |
|---|
| 2218 | + if (force || |
|---|
| 2219 | + refcount_read(&sk->sk_wmem_alloc) < READ_ONCE(sk->sk_sndbuf)) { |
|---|
| 1960 | 2220 | struct sk_buff *skb = alloc_skb(size, priority); |
|---|
| 2221 | + |
|---|
| 1961 | 2222 | if (skb) { |
|---|
| 1962 | 2223 | skb_set_owner_w(skb, sk); |
|---|
| 1963 | 2224 | return skb; |
|---|
| .. | .. |
|---|
| 1981 | 2242 | |
|---|
| 1982 | 2243 | /* small safe race: SKB_TRUESIZE may differ from final skb->truesize */ |
|---|
| 1983 | 2244 | if (atomic_read(&sk->sk_omem_alloc) + SKB_TRUESIZE(size) > |
|---|
| 1984 | | - sysctl_optmem_max) |
|---|
| 2245 | + READ_ONCE(sysctl_optmem_max)) |
|---|
| 1985 | 2246 | return NULL; |
|---|
| 1986 | 2247 | |
|---|
| 1987 | 2248 | skb = alloc_skb(size, priority); |
|---|
| .. | .. |
|---|
| 1999 | 2260 | */ |
|---|
| 2000 | 2261 | void *sock_kmalloc(struct sock *sk, int size, gfp_t priority) |
|---|
| 2001 | 2262 | { |
|---|
| 2002 | | - if ((unsigned int)size <= sysctl_optmem_max && |
|---|
| 2003 | | - atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) { |
|---|
| 2263 | + int optmem_max = READ_ONCE(sysctl_optmem_max); |
|---|
| 2264 | + |
|---|
| 2265 | + if ((unsigned int)size <= optmem_max && |
|---|
| 2266 | + atomic_read(&sk->sk_omem_alloc) + size < optmem_max) { |
|---|
| 2004 | 2267 | void *mem; |
|---|
| 2005 | 2268 | /* First do the add, to avoid the race if kmalloc |
|---|
| 2006 | 2269 | * might sleep. |
|---|
| .. | .. |
|---|
| 2025 | 2288 | if (WARN_ON_ONCE(!mem)) |
|---|
| 2026 | 2289 | return; |
|---|
| 2027 | 2290 | if (nullify) |
|---|
| 2028 | | - kzfree(mem); |
|---|
| 2291 | + kfree_sensitive(mem); |
|---|
| 2029 | 2292 | else |
|---|
| 2030 | 2293 | kfree(mem); |
|---|
| 2031 | 2294 | atomic_sub(size, &sk->sk_omem_alloc); |
|---|
| .. | .. |
|---|
| 2058 | 2321 | break; |
|---|
| 2059 | 2322 | set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); |
|---|
| 2060 | 2323 | prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); |
|---|
| 2061 | | - if (refcount_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) |
|---|
| 2324 | + if (refcount_read(&sk->sk_wmem_alloc) < READ_ONCE(sk->sk_sndbuf)) |
|---|
| 2062 | 2325 | break; |
|---|
| 2063 | | - if (sk->sk_shutdown & SEND_SHUTDOWN) |
|---|
| 2326 | + if (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN) |
|---|
| 2064 | 2327 | break; |
|---|
| 2065 | | - if (sk->sk_err) |
|---|
| 2328 | + if (READ_ONCE(sk->sk_err)) |
|---|
| 2066 | 2329 | break; |
|---|
| 2067 | 2330 | timeo = schedule_timeout(timeo); |
|---|
| 2068 | 2331 | } |
|---|
| .. | .. |
|---|
| 2090 | 2353 | goto failure; |
|---|
| 2091 | 2354 | |
|---|
| 2092 | 2355 | err = -EPIPE; |
|---|
| 2093 | | - if (sk->sk_shutdown & SEND_SHUTDOWN) |
|---|
| 2356 | + if (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN) |
|---|
| 2094 | 2357 | goto failure; |
|---|
| 2095 | 2358 | |
|---|
| 2096 | | - if (sk_wmem_alloc_get(sk) < sk->sk_sndbuf) |
|---|
| 2359 | + if (sk_wmem_alloc_get(sk) < READ_ONCE(sk->sk_sndbuf)) |
|---|
| 2097 | 2360 | break; |
|---|
| 2098 | 2361 | |
|---|
| 2099 | 2362 | sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); |
|---|
| .. | .. |
|---|
| 2139 | 2402 | return -EINVAL; |
|---|
| 2140 | 2403 | sockc->mark = *(u32 *)CMSG_DATA(cmsg); |
|---|
| 2141 | 2404 | break; |
|---|
| 2142 | | - case SO_TIMESTAMPING: |
|---|
| 2405 | + case SO_TIMESTAMPING_OLD: |
|---|
| 2143 | 2406 | if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32))) |
|---|
| 2144 | 2407 | return -EINVAL; |
|---|
| 2145 | 2408 | |
|---|
| .. | .. |
|---|
| 2207 | 2470 | } |
|---|
| 2208 | 2471 | } |
|---|
| 2209 | 2472 | |
|---|
| 2210 | | -/* On 32bit arches, an skb frag is limited to 2^15 */ |
|---|
| 2211 | 2473 | #define SKB_FRAG_PAGE_ORDER get_order(32768) |
|---|
| 2474 | +DEFINE_STATIC_KEY_FALSE(net_high_order_alloc_disable_key); |
|---|
| 2212 | 2475 | |
|---|
| 2213 | 2476 | /** |
|---|
| 2214 | 2477 | * skb_page_frag_refill - check that a page_frag contains enough room |
|---|
| .. | .. |
|---|
| 2233 | 2496 | } |
|---|
| 2234 | 2497 | |
|---|
| 2235 | 2498 | pfrag->offset = 0; |
|---|
| 2236 | | - if (SKB_FRAG_PAGE_ORDER) { |
|---|
| 2499 | + if (SKB_FRAG_PAGE_ORDER && |
|---|
| 2500 | + !static_branch_unlikely(&net_high_order_alloc_disable_key)) { |
|---|
| 2237 | 2501 | /* Avoid direct reclaim but allow kswapd to wake */ |
|---|
| 2238 | 2502 | pfrag->page = alloc_pages((gfp & ~__GFP_DIRECT_RECLAIM) | |
|---|
| 2239 | 2503 | __GFP_COMP | __GFP_NOWARN | |
|---|
| .. | .. |
|---|
| 2263 | 2527 | return false; |
|---|
| 2264 | 2528 | } |
|---|
| 2265 | 2529 | EXPORT_SYMBOL(sk_page_frag_refill); |
|---|
| 2266 | | - |
|---|
| 2267 | | -int sk_alloc_sg(struct sock *sk, int len, struct scatterlist *sg, |
|---|
| 2268 | | - int sg_start, int *sg_curr_index, unsigned int *sg_curr_size, |
|---|
| 2269 | | - int first_coalesce) |
|---|
| 2270 | | -{ |
|---|
| 2271 | | - int sg_curr = *sg_curr_index, use = 0, rc = 0; |
|---|
| 2272 | | - unsigned int size = *sg_curr_size; |
|---|
| 2273 | | - struct page_frag *pfrag; |
|---|
| 2274 | | - struct scatterlist *sge; |
|---|
| 2275 | | - |
|---|
| 2276 | | - len -= size; |
|---|
| 2277 | | - pfrag = sk_page_frag(sk); |
|---|
| 2278 | | - |
|---|
| 2279 | | - while (len > 0) { |
|---|
| 2280 | | - unsigned int orig_offset; |
|---|
| 2281 | | - |
|---|
| 2282 | | - if (!sk_page_frag_refill(sk, pfrag)) { |
|---|
| 2283 | | - rc = -ENOMEM; |
|---|
| 2284 | | - goto out; |
|---|
| 2285 | | - } |
|---|
| 2286 | | - |
|---|
| 2287 | | - use = min_t(int, len, pfrag->size - pfrag->offset); |
|---|
| 2288 | | - |
|---|
| 2289 | | - if (!sk_wmem_schedule(sk, use)) { |
|---|
| 2290 | | - rc = -ENOMEM; |
|---|
| 2291 | | - goto out; |
|---|
| 2292 | | - } |
|---|
| 2293 | | - |
|---|
| 2294 | | - sk_mem_charge(sk, use); |
|---|
| 2295 | | - size += use; |
|---|
| 2296 | | - orig_offset = pfrag->offset; |
|---|
| 2297 | | - pfrag->offset += use; |
|---|
| 2298 | | - |
|---|
| 2299 | | - sge = sg + sg_curr - 1; |
|---|
| 2300 | | - if (sg_curr > first_coalesce && sg_page(sge) == pfrag->page && |
|---|
| 2301 | | - sge->offset + sge->length == orig_offset) { |
|---|
| 2302 | | - sge->length += use; |
|---|
| 2303 | | - } else { |
|---|
| 2304 | | - sge = sg + sg_curr; |
|---|
| 2305 | | - sg_unmark_end(sge); |
|---|
| 2306 | | - sg_set_page(sge, pfrag->page, use, orig_offset); |
|---|
| 2307 | | - get_page(pfrag->page); |
|---|
| 2308 | | - sg_curr++; |
|---|
| 2309 | | - |
|---|
| 2310 | | - if (sg_curr == MAX_SKB_FRAGS) |
|---|
| 2311 | | - sg_curr = 0; |
|---|
| 2312 | | - |
|---|
| 2313 | | - if (sg_curr == sg_start) { |
|---|
| 2314 | | - rc = -ENOSPC; |
|---|
| 2315 | | - break; |
|---|
| 2316 | | - } |
|---|
| 2317 | | - } |
|---|
| 2318 | | - |
|---|
| 2319 | | - len -= use; |
|---|
| 2320 | | - } |
|---|
| 2321 | | -out: |
|---|
| 2322 | | - *sg_curr_size = size; |
|---|
| 2323 | | - *sg_curr_index = sg_curr; |
|---|
| 2324 | | - return rc; |
|---|
| 2325 | | -} |
|---|
| 2326 | | -EXPORT_SYMBOL(sk_alloc_sg); |
|---|
| 2327 | 2530 | |
|---|
| 2328 | 2531 | static void __lock_sock(struct sock *sk) |
|---|
| 2329 | 2532 | __releases(&sk->sk_lock.slock) |
|---|
| .. | .. |
|---|
| 2358 | 2561 | next = skb->next; |
|---|
| 2359 | 2562 | prefetch(next); |
|---|
| 2360 | 2563 | WARN_ON_ONCE(skb_dst_is_noref(skb)); |
|---|
| 2361 | | - skb->next = NULL; |
|---|
| 2564 | + skb_mark_not_on_list(skb); |
|---|
| 2362 | 2565 | sk_backlog_rcv(sk, skb); |
|---|
| 2363 | 2566 | |
|---|
| 2364 | 2567 | cond_resched(); |
|---|
| .. | .. |
|---|
| 2530 | 2733 | if (mem_cgroup_sockets_enabled && sk->sk_memcg) |
|---|
| 2531 | 2734 | mem_cgroup_uncharge_skmem(sk->sk_memcg, amount); |
|---|
| 2532 | 2735 | |
|---|
| 2533 | | - if (sk_under_memory_pressure(sk) && |
|---|
| 2736 | + if (sk_under_global_memory_pressure(sk) && |
|---|
| 2534 | 2737 | (sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0))) |
|---|
| 2535 | 2738 | sk_leave_memory_pressure(sk); |
|---|
| 2536 | 2739 | } |
|---|
| .. | .. |
|---|
| 2551 | 2754 | |
|---|
| 2552 | 2755 | int sk_set_peek_off(struct sock *sk, int val) |
|---|
| 2553 | 2756 | { |
|---|
| 2554 | | - sk->sk_peek_off = val; |
|---|
| 2757 | + WRITE_ONCE(sk->sk_peek_off, val); |
|---|
| 2555 | 2758 | return 0; |
|---|
| 2556 | 2759 | } |
|---|
| 2557 | 2760 | EXPORT_SYMBOL_GPL(sk_set_peek_off); |
|---|
| .. | .. |
|---|
| 2613 | 2816 | return -EOPNOTSUPP; |
|---|
| 2614 | 2817 | } |
|---|
| 2615 | 2818 | EXPORT_SYMBOL(sock_no_shutdown); |
|---|
| 2616 | | - |
|---|
| 2617 | | -int sock_no_setsockopt(struct socket *sock, int level, int optname, |
|---|
| 2618 | | - char __user *optval, unsigned int optlen) |
|---|
| 2619 | | -{ |
|---|
| 2620 | | - return -EOPNOTSUPP; |
|---|
| 2621 | | -} |
|---|
| 2622 | | -EXPORT_SYMBOL(sock_no_setsockopt); |
|---|
| 2623 | | - |
|---|
| 2624 | | -int sock_no_getsockopt(struct socket *sock, int level, int optname, |
|---|
| 2625 | | - char __user *optval, int __user *optlen) |
|---|
| 2626 | | -{ |
|---|
| 2627 | | - return -EOPNOTSUPP; |
|---|
| 2628 | | -} |
|---|
| 2629 | | -EXPORT_SYMBOL(sock_no_getsockopt); |
|---|
| 2630 | 2819 | |
|---|
| 2631 | 2820 | int sock_no_sendmsg(struct socket *sock, struct msghdr *m, size_t len) |
|---|
| 2632 | 2821 | { |
|---|
| .. | .. |
|---|
| 2732 | 2921 | rcu_read_unlock(); |
|---|
| 2733 | 2922 | } |
|---|
| 2734 | 2923 | |
|---|
| 2735 | | -static void sock_def_readable(struct sock *sk) |
|---|
| 2924 | +void sock_def_readable(struct sock *sk) |
|---|
| 2736 | 2925 | { |
|---|
| 2737 | 2926 | struct socket_wq *wq; |
|---|
| 2738 | 2927 | |
|---|
| 2739 | 2928 | rcu_read_lock(); |
|---|
| 2740 | 2929 | wq = rcu_dereference(sk->sk_wq); |
|---|
| 2741 | | - if (skwq_has_sleeper(wq)) |
|---|
| 2930 | + |
|---|
| 2931 | + if (skwq_has_sleeper(wq)) { |
|---|
| 2932 | + int done = 0; |
|---|
| 2933 | + |
|---|
| 2934 | + trace_android_vh_do_wake_up_sync(&wq->wait, &done); |
|---|
| 2935 | + if (done) |
|---|
| 2936 | + goto out; |
|---|
| 2937 | + |
|---|
| 2742 | 2938 | wake_up_interruptible_sync_poll(&wq->wait, EPOLLIN | EPOLLPRI | |
|---|
| 2743 | 2939 | EPOLLRDNORM | EPOLLRDBAND); |
|---|
| 2940 | + } |
|---|
| 2941 | + |
|---|
| 2942 | +out: |
|---|
| 2744 | 2943 | sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN); |
|---|
| 2745 | 2944 | rcu_read_unlock(); |
|---|
| 2746 | 2945 | } |
|---|
| .. | .. |
|---|
| 2754 | 2953 | /* Do not wake up a writer until he can make "significant" |
|---|
| 2755 | 2954 | * progress. --DaveM |
|---|
| 2756 | 2955 | */ |
|---|
| 2757 | | - if ((refcount_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) { |
|---|
| 2956 | + if ((refcount_read(&sk->sk_wmem_alloc) << 1) <= READ_ONCE(sk->sk_sndbuf)) { |
|---|
| 2758 | 2957 | wq = rcu_dereference(sk->sk_wq); |
|---|
| 2759 | 2958 | if (skwq_has_sleeper(wq)) |
|---|
| 2760 | 2959 | wake_up_interruptible_sync_poll(&wq->wait, EPOLLOUT | |
|---|
| .. | .. |
|---|
| 2795 | 2994 | } |
|---|
| 2796 | 2995 | EXPORT_SYMBOL(sk_stop_timer); |
|---|
| 2797 | 2996 | |
|---|
| 2798 | | -void sock_init_data(struct socket *sock, struct sock *sk) |
|---|
| 2997 | +void sk_stop_timer_sync(struct sock *sk, struct timer_list *timer) |
|---|
| 2998 | +{ |
|---|
| 2999 | + if (del_timer_sync(timer)) |
|---|
| 3000 | + __sock_put(sk); |
|---|
| 3001 | +} |
|---|
| 3002 | +EXPORT_SYMBOL(sk_stop_timer_sync); |
|---|
| 3003 | + |
|---|
| 3004 | +void sock_init_data_uid(struct socket *sock, struct sock *sk, kuid_t uid) |
|---|
| 2799 | 3005 | { |
|---|
| 2800 | 3006 | sk_init_common(sk); |
|---|
| 2801 | 3007 | sk->sk_send_head = NULL; |
|---|
| .. | .. |
|---|
| 2803 | 3009 | timer_setup(&sk->sk_timer, NULL, 0); |
|---|
| 2804 | 3010 | |
|---|
| 2805 | 3011 | sk->sk_allocation = GFP_KERNEL; |
|---|
| 2806 | | - sk->sk_rcvbuf = sysctl_rmem_default; |
|---|
| 2807 | | - sk->sk_sndbuf = sysctl_wmem_default; |
|---|
| 3012 | + sk->sk_rcvbuf = READ_ONCE(sysctl_rmem_default); |
|---|
| 3013 | + sk->sk_sndbuf = READ_ONCE(sysctl_wmem_default); |
|---|
| 2808 | 3014 | sk->sk_state = TCP_CLOSE; |
|---|
| 2809 | 3015 | sk_set_socket(sk, sock); |
|---|
| 2810 | 3016 | |
|---|
| .. | .. |
|---|
| 2812 | 3018 | |
|---|
| 2813 | 3019 | if (sock) { |
|---|
| 2814 | 3020 | sk->sk_type = sock->type; |
|---|
| 2815 | | - sk->sk_wq = sock->wq; |
|---|
| 3021 | + RCU_INIT_POINTER(sk->sk_wq, &sock->wq); |
|---|
| 2816 | 3022 | sock->sk = sk; |
|---|
| 2817 | | - sk->sk_uid = SOCK_INODE(sock)->i_uid; |
|---|
| 2818 | 3023 | } else { |
|---|
| 2819 | | - sk->sk_wq = NULL; |
|---|
| 2820 | | - sk->sk_uid = make_kuid(sock_net(sk)->user_ns, 0); |
|---|
| 3024 | + RCU_INIT_POINTER(sk->sk_wq, NULL); |
|---|
| 2821 | 3025 | } |
|---|
| 3026 | + sk->sk_uid = uid; |
|---|
| 2822 | 3027 | |
|---|
| 2823 | 3028 | rwlock_init(&sk->sk_callback_lock); |
|---|
| 2824 | 3029 | if (sk->sk_kern_sock) |
|---|
| .. | .. |
|---|
| 2859 | 3064 | |
|---|
| 2860 | 3065 | #ifdef CONFIG_NET_RX_BUSY_POLL |
|---|
| 2861 | 3066 | sk->sk_napi_id = 0; |
|---|
| 2862 | | - sk->sk_ll_usec = sysctl_net_busy_read; |
|---|
| 3067 | + sk->sk_ll_usec = READ_ONCE(sysctl_net_busy_read); |
|---|
| 2863 | 3068 | #endif |
|---|
| 2864 | 3069 | |
|---|
| 2865 | | - sk->sk_max_pacing_rate = ~0U; |
|---|
| 2866 | | - sk->sk_pacing_rate = ~0U; |
|---|
| 2867 | | - sk->sk_pacing_shift = 10; |
|---|
| 3070 | + sk->sk_max_pacing_rate = ~0UL; |
|---|
| 3071 | + sk->sk_pacing_rate = ~0UL; |
|---|
| 3072 | + WRITE_ONCE(sk->sk_pacing_shift, 10); |
|---|
| 2868 | 3073 | sk->sk_incoming_cpu = -1; |
|---|
| 2869 | 3074 | |
|---|
| 2870 | 3075 | sk_rx_queue_clear(sk); |
|---|
| 2871 | 3076 | /* |
|---|
| 2872 | 3077 | * Before updating sk_refcnt, we must commit prior changes to memory |
|---|
| 2873 | | - * (Documentation/RCU/rculist_nulls.txt for details) |
|---|
| 3078 | + * (Documentation/RCU/rculist_nulls.rst for details) |
|---|
| 2874 | 3079 | */ |
|---|
| 2875 | 3080 | smp_wmb(); |
|---|
| 2876 | 3081 | refcount_set(&sk->sk_refcnt, 1); |
|---|
| 2877 | 3082 | atomic_set(&sk->sk_drops, 0); |
|---|
| 3083 | +} |
|---|
| 3084 | +EXPORT_SYMBOL(sock_init_data_uid); |
|---|
| 3085 | + |
|---|
| 3086 | +void sock_init_data(struct socket *sock, struct sock *sk) |
|---|
| 3087 | +{ |
|---|
| 3088 | + kuid_t uid = sock ? |
|---|
| 3089 | + SOCK_INODE(sock)->i_uid : |
|---|
| 3090 | + make_kuid(sock_net(sk)->user_ns, 0); |
|---|
| 3091 | + |
|---|
| 3092 | + sock_init_data_uid(sock, sk, uid); |
|---|
| 2878 | 3093 | } |
|---|
| 2879 | 3094 | EXPORT_SYMBOL(sock_init_data); |
|---|
| 2880 | 3095 | |
|---|
| .. | .. |
|---|
| 2949 | 3164 | } |
|---|
| 2950 | 3165 | EXPORT_SYMBOL(lock_sock_fast); |
|---|
| 2951 | 3166 | |
|---|
| 2952 | | -int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp) |
|---|
| 3167 | +int sock_gettstamp(struct socket *sock, void __user *userstamp, |
|---|
| 3168 | + bool timeval, bool time32) |
|---|
| 2953 | 3169 | { |
|---|
| 2954 | | - struct timeval tv; |
|---|
| 3170 | + struct sock *sk = sock->sk; |
|---|
| 3171 | + struct timespec64 ts; |
|---|
| 2955 | 3172 | |
|---|
| 2956 | 3173 | sock_enable_timestamp(sk, SOCK_TIMESTAMP); |
|---|
| 2957 | | - tv = ktime_to_timeval(sock_read_timestamp(sk)); |
|---|
| 2958 | | - if (tv.tv_sec == -1) |
|---|
| 2959 | | - return -ENOENT; |
|---|
| 2960 | | - if (tv.tv_sec == 0) { |
|---|
| 2961 | | - ktime_t kt = ktime_get_real(); |
|---|
| 2962 | | - sock_write_timestamp(sk, kt); |
|---|
| 2963 | | - tv = ktime_to_timeval(kt); |
|---|
| 2964 | | - } |
|---|
| 2965 | | - return copy_to_user(userstamp, &tv, sizeof(tv)) ? -EFAULT : 0; |
|---|
| 2966 | | -} |
|---|
| 2967 | | -EXPORT_SYMBOL(sock_get_timestamp); |
|---|
| 2968 | | - |
|---|
| 2969 | | -int sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp) |
|---|
| 2970 | | -{ |
|---|
| 2971 | | - struct timespec ts; |
|---|
| 2972 | | - |
|---|
| 2973 | | - sock_enable_timestamp(sk, SOCK_TIMESTAMP); |
|---|
| 2974 | | - ts = ktime_to_timespec(sock_read_timestamp(sk)); |
|---|
| 3174 | + ts = ktime_to_timespec64(sock_read_timestamp(sk)); |
|---|
| 2975 | 3175 | if (ts.tv_sec == -1) |
|---|
| 2976 | 3176 | return -ENOENT; |
|---|
| 2977 | 3177 | if (ts.tv_sec == 0) { |
|---|
| 2978 | 3178 | ktime_t kt = ktime_get_real(); |
|---|
| 2979 | 3179 | sock_write_timestamp(sk, kt); |
|---|
| 2980 | | - ts = ktime_to_timespec(sk->sk_stamp); |
|---|
| 3180 | + ts = ktime_to_timespec64(kt); |
|---|
| 2981 | 3181 | } |
|---|
| 2982 | | - return copy_to_user(userstamp, &ts, sizeof(ts)) ? -EFAULT : 0; |
|---|
| 2983 | | -} |
|---|
| 2984 | | -EXPORT_SYMBOL(sock_get_timestampns); |
|---|
| 2985 | 3182 | |
|---|
| 2986 | | -void sock_enable_timestamp(struct sock *sk, int flag) |
|---|
| 3183 | + if (timeval) |
|---|
| 3184 | + ts.tv_nsec /= 1000; |
|---|
| 3185 | + |
|---|
| 3186 | +#ifdef CONFIG_COMPAT_32BIT_TIME |
|---|
| 3187 | + if (time32) |
|---|
| 3188 | + return put_old_timespec32(&ts, userstamp); |
|---|
| 3189 | +#endif |
|---|
| 3190 | +#ifdef CONFIG_SPARC64 |
|---|
| 3191 | + /* beware of padding in sparc64 timeval */ |
|---|
| 3192 | + if (timeval && !in_compat_syscall()) { |
|---|
| 3193 | + struct __kernel_old_timeval __user tv = { |
|---|
| 3194 | + .tv_sec = ts.tv_sec, |
|---|
| 3195 | + .tv_usec = ts.tv_nsec, |
|---|
| 3196 | + }; |
|---|
| 3197 | + if (copy_to_user(userstamp, &tv, sizeof(tv))) |
|---|
| 3198 | + return -EFAULT; |
|---|
| 3199 | + return 0; |
|---|
| 3200 | + } |
|---|
| 3201 | +#endif |
|---|
| 3202 | + return put_timespec64(&ts, userstamp); |
|---|
| 3203 | +} |
|---|
| 3204 | +EXPORT_SYMBOL(sock_gettstamp); |
|---|
| 3205 | + |
|---|
| 3206 | +void sock_enable_timestamp(struct sock *sk, enum sock_flags flag) |
|---|
| 2987 | 3207 | { |
|---|
| 2988 | 3208 | if (!sock_flag(sk, flag)) { |
|---|
| 2989 | 3209 | unsigned long previous_flags = sk->sk_flags; |
|---|
| .. | .. |
|---|
| 3052 | 3272 | } |
|---|
| 3053 | 3273 | EXPORT_SYMBOL(sock_common_getsockopt); |
|---|
| 3054 | 3274 | |
|---|
| 3055 | | -#ifdef CONFIG_COMPAT |
|---|
| 3056 | | -int compat_sock_common_getsockopt(struct socket *sock, int level, int optname, |
|---|
| 3057 | | - char __user *optval, int __user *optlen) |
|---|
| 3058 | | -{ |
|---|
| 3059 | | - struct sock *sk = sock->sk; |
|---|
| 3060 | | - |
|---|
| 3061 | | - if (sk->sk_prot->compat_getsockopt != NULL) |
|---|
| 3062 | | - return sk->sk_prot->compat_getsockopt(sk, level, optname, |
|---|
| 3063 | | - optval, optlen); |
|---|
| 3064 | | - return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen); |
|---|
| 3065 | | -} |
|---|
| 3066 | | -EXPORT_SYMBOL(compat_sock_common_getsockopt); |
|---|
| 3067 | | -#endif |
|---|
| 3068 | | - |
|---|
| 3069 | 3275 | int sock_common_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, |
|---|
| 3070 | 3276 | int flags) |
|---|
| 3071 | 3277 | { |
|---|
| .. | .. |
|---|
| 3085 | 3291 | * Set socket options on an inet socket. |
|---|
| 3086 | 3292 | */ |
|---|
| 3087 | 3293 | int sock_common_setsockopt(struct socket *sock, int level, int optname, |
|---|
| 3088 | | - char __user *optval, unsigned int optlen) |
|---|
| 3294 | + sockptr_t optval, unsigned int optlen) |
|---|
| 3089 | 3295 | { |
|---|
| 3090 | 3296 | struct sock *sk = sock->sk; |
|---|
| 3091 | 3297 | |
|---|
| .. | .. |
|---|
| 3093 | 3299 | } |
|---|
| 3094 | 3300 | EXPORT_SYMBOL(sock_common_setsockopt); |
|---|
| 3095 | 3301 | |
|---|
| 3096 | | -#ifdef CONFIG_COMPAT |
|---|
| 3097 | | -int compat_sock_common_setsockopt(struct socket *sock, int level, int optname, |
|---|
| 3098 | | - char __user *optval, unsigned int optlen) |
|---|
| 3099 | | -{ |
|---|
| 3100 | | - struct sock *sk = sock->sk; |
|---|
| 3101 | | - |
|---|
| 3102 | | - if (sk->sk_prot->compat_setsockopt != NULL) |
|---|
| 3103 | | - return sk->sk_prot->compat_setsockopt(sk, level, optname, |
|---|
| 3104 | | - optval, optlen); |
|---|
| 3105 | | - return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen); |
|---|
| 3106 | | -} |
|---|
| 3107 | | -EXPORT_SYMBOL(compat_sock_common_setsockopt); |
|---|
| 3108 | | -#endif |
|---|
| 3109 | | - |
|---|
| 3110 | 3302 | void sk_common_release(struct sock *sk) |
|---|
| 3111 | 3303 | { |
|---|
| 3112 | 3304 | if (sk->sk_prot->destroy) |
|---|
| 3113 | 3305 | sk->sk_prot->destroy(sk); |
|---|
| 3114 | 3306 | |
|---|
| 3115 | 3307 | /* |
|---|
| 3116 | | - * Observation: when sock_common_release is called, processes have |
|---|
| 3308 | + * Observation: when sk_common_release is called, processes have |
|---|
| 3117 | 3309 | * no access to socket. But net still has. |
|---|
| 3118 | 3310 | * Step one, detach it from networking: |
|---|
| 3119 | 3311 | * |
|---|
| .. | .. |
|---|
| 3149 | 3341 | memset(mem, 0, sizeof(*mem) * SK_MEMINFO_VARS); |
|---|
| 3150 | 3342 | |
|---|
| 3151 | 3343 | mem[SK_MEMINFO_RMEM_ALLOC] = sk_rmem_alloc_get(sk); |
|---|
| 3152 | | - mem[SK_MEMINFO_RCVBUF] = sk->sk_rcvbuf; |
|---|
| 3344 | + mem[SK_MEMINFO_RCVBUF] = READ_ONCE(sk->sk_rcvbuf); |
|---|
| 3153 | 3345 | mem[SK_MEMINFO_WMEM_ALLOC] = sk_wmem_alloc_get(sk); |
|---|
| 3154 | | - mem[SK_MEMINFO_SNDBUF] = sk->sk_sndbuf; |
|---|
| 3346 | + mem[SK_MEMINFO_SNDBUF] = READ_ONCE(sk->sk_sndbuf); |
|---|
| 3155 | 3347 | mem[SK_MEMINFO_FWD_ALLOC] = sk->sk_forward_alloc; |
|---|
| 3156 | | - mem[SK_MEMINFO_WMEM_QUEUED] = sk->sk_wmem_queued; |
|---|
| 3348 | + mem[SK_MEMINFO_WMEM_QUEUED] = READ_ONCE(sk->sk_wmem_queued); |
|---|
| 3157 | 3349 | mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc); |
|---|
| 3158 | | - mem[SK_MEMINFO_BACKLOG] = sk->sk_backlog.len; |
|---|
| 3350 | + mem[SK_MEMINFO_BACKLOG] = READ_ONCE(sk->sk_backlog.len); |
|---|
| 3159 | 3351 | mem[SK_MEMINFO_DROPS] = atomic_read(&sk->sk_drops); |
|---|
| 3160 | 3352 | } |
|---|
| 3161 | 3353 | |
|---|
| .. | .. |
|---|
| 3240 | 3432 | |
|---|
| 3241 | 3433 | core_initcall(net_inuse_init); |
|---|
| 3242 | 3434 | |
|---|
| 3243 | | -static void assign_proto_idx(struct proto *prot) |
|---|
| 3435 | +static int assign_proto_idx(struct proto *prot) |
|---|
| 3244 | 3436 | { |
|---|
| 3245 | 3437 | prot->inuse_idx = find_first_zero_bit(proto_inuse_idx, PROTO_INUSE_NR); |
|---|
| 3246 | 3438 | |
|---|
| 3247 | 3439 | if (unlikely(prot->inuse_idx == PROTO_INUSE_NR - 1)) { |
|---|
| 3248 | 3440 | pr_err("PROTO_INUSE_NR exhausted\n"); |
|---|
| 3249 | | - return; |
|---|
| 3441 | + return -ENOSPC; |
|---|
| 3250 | 3442 | } |
|---|
| 3251 | 3443 | |
|---|
| 3252 | 3444 | set_bit(prot->inuse_idx, proto_inuse_idx); |
|---|
| 3445 | + return 0; |
|---|
| 3253 | 3446 | } |
|---|
| 3254 | 3447 | |
|---|
| 3255 | 3448 | static void release_proto_idx(struct proto *prot) |
|---|
| .. | .. |
|---|
| 3258 | 3451 | clear_bit(prot->inuse_idx, proto_inuse_idx); |
|---|
| 3259 | 3452 | } |
|---|
| 3260 | 3453 | #else |
|---|
| 3261 | | -static inline void assign_proto_idx(struct proto *prot) |
|---|
| 3454 | +static inline int assign_proto_idx(struct proto *prot) |
|---|
| 3262 | 3455 | { |
|---|
| 3456 | + return 0; |
|---|
| 3263 | 3457 | } |
|---|
| 3264 | 3458 | |
|---|
| 3265 | 3459 | static inline void release_proto_idx(struct proto *prot) |
|---|
| .. | .. |
|---|
| 3270 | 3464 | { |
|---|
| 3271 | 3465 | } |
|---|
| 3272 | 3466 | #endif |
|---|
| 3467 | + |
|---|
| 3468 | +static void tw_prot_cleanup(struct timewait_sock_ops *twsk_prot) |
|---|
| 3469 | +{ |
|---|
| 3470 | + if (!twsk_prot) |
|---|
| 3471 | + return; |
|---|
| 3472 | + kfree(twsk_prot->twsk_slab_name); |
|---|
| 3473 | + twsk_prot->twsk_slab_name = NULL; |
|---|
| 3474 | + kmem_cache_destroy(twsk_prot->twsk_slab); |
|---|
| 3475 | + twsk_prot->twsk_slab = NULL; |
|---|
| 3476 | +} |
|---|
| 3273 | 3477 | |
|---|
| 3274 | 3478 | static void req_prot_cleanup(struct request_sock_ops *rsk_prot) |
|---|
| 3275 | 3479 | { |
|---|
| .. | .. |
|---|
| 3308 | 3512 | |
|---|
| 3309 | 3513 | int proto_register(struct proto *prot, int alloc_slab) |
|---|
| 3310 | 3514 | { |
|---|
| 3515 | + int ret = -ENOBUFS; |
|---|
| 3516 | + |
|---|
| 3311 | 3517 | if (alloc_slab) { |
|---|
| 3312 | 3518 | prot->slab = kmem_cache_create_usercopy(prot->name, |
|---|
| 3313 | 3519 | prot->obj_size, 0, |
|---|
| .. | .. |
|---|
| 3339 | 3545 | prot->slab_flags, |
|---|
| 3340 | 3546 | NULL); |
|---|
| 3341 | 3547 | if (prot->twsk_prot->twsk_slab == NULL) |
|---|
| 3342 | | - goto out_free_timewait_sock_slab_name; |
|---|
| 3548 | + goto out_free_timewait_sock_slab; |
|---|
| 3343 | 3549 | } |
|---|
| 3344 | 3550 | } |
|---|
| 3345 | 3551 | |
|---|
| 3346 | 3552 | mutex_lock(&proto_list_mutex); |
|---|
| 3553 | + ret = assign_proto_idx(prot); |
|---|
| 3554 | + if (ret) { |
|---|
| 3555 | + mutex_unlock(&proto_list_mutex); |
|---|
| 3556 | + goto out_free_timewait_sock_slab; |
|---|
| 3557 | + } |
|---|
| 3347 | 3558 | list_add(&prot->node, &proto_list); |
|---|
| 3348 | | - assign_proto_idx(prot); |
|---|
| 3349 | 3559 | mutex_unlock(&proto_list_mutex); |
|---|
| 3350 | | - return 0; |
|---|
| 3560 | + return ret; |
|---|
| 3351 | 3561 | |
|---|
| 3352 | | -out_free_timewait_sock_slab_name: |
|---|
| 3353 | | - kfree(prot->twsk_prot->twsk_slab_name); |
|---|
| 3562 | +out_free_timewait_sock_slab: |
|---|
| 3563 | + if (alloc_slab && prot->twsk_prot) |
|---|
| 3564 | + tw_prot_cleanup(prot->twsk_prot); |
|---|
| 3354 | 3565 | out_free_request_sock_slab: |
|---|
| 3355 | | - req_prot_cleanup(prot->rsk_prot); |
|---|
| 3566 | + if (alloc_slab) { |
|---|
| 3567 | + req_prot_cleanup(prot->rsk_prot); |
|---|
| 3356 | 3568 | |
|---|
| 3357 | | - kmem_cache_destroy(prot->slab); |
|---|
| 3358 | | - prot->slab = NULL; |
|---|
| 3569 | + kmem_cache_destroy(prot->slab); |
|---|
| 3570 | + prot->slab = NULL; |
|---|
| 3571 | + } |
|---|
| 3359 | 3572 | out: |
|---|
| 3360 | | - return -ENOBUFS; |
|---|
| 3573 | + return ret; |
|---|
| 3361 | 3574 | } |
|---|
| 3362 | 3575 | EXPORT_SYMBOL(proto_register); |
|---|
| 3363 | 3576 | |
|---|
| .. | .. |
|---|
| 3372 | 3585 | prot->slab = NULL; |
|---|
| 3373 | 3586 | |
|---|
| 3374 | 3587 | req_prot_cleanup(prot->rsk_prot); |
|---|
| 3375 | | - |
|---|
| 3376 | | - if (prot->twsk_prot != NULL && prot->twsk_prot->twsk_slab != NULL) { |
|---|
| 3377 | | - kmem_cache_destroy(prot->twsk_prot->twsk_slab); |
|---|
| 3378 | | - kfree(prot->twsk_prot->twsk_slab_name); |
|---|
| 3379 | | - prot->twsk_prot->twsk_slab = NULL; |
|---|
| 3380 | | - } |
|---|
| 3588 | + tw_prot_cleanup(prot->twsk_prot); |
|---|
| 3381 | 3589 | } |
|---|
| 3382 | 3590 | EXPORT_SYMBOL(proto_unregister); |
|---|
| 3383 | 3591 | |
|---|
| .. | .. |
|---|
| 3394 | 3602 | #ifdef CONFIG_INET |
|---|
| 3395 | 3603 | if (family == AF_INET && |
|---|
| 3396 | 3604 | protocol != IPPROTO_RAW && |
|---|
| 3605 | + protocol < MAX_INET_PROTOS && |
|---|
| 3397 | 3606 | !rcu_access_pointer(inet_protos[protocol])) |
|---|
| 3398 | 3607 | return -ENOENT; |
|---|
| 3399 | 3608 | #endif |
|---|
| .. | .. |
|---|
| 3431 | 3640 | return proto->memory_allocated != NULL ? proto_memory_allocated(proto) : -1L; |
|---|
| 3432 | 3641 | } |
|---|
| 3433 | 3642 | |
|---|
| 3434 | | -static char *sock_prot_memory_pressure(struct proto *proto) |
|---|
| 3643 | +static const char *sock_prot_memory_pressure(struct proto *proto) |
|---|
| 3435 | 3644 | { |
|---|
| 3436 | 3645 | return proto->memory_pressure != NULL ? |
|---|
| 3437 | 3646 | proto_memory_pressure(proto) ? "yes" : "no" : "NI"; |
|---|
| .. | .. |
|---|
| 3535 | 3744 | } |
|---|
| 3536 | 3745 | EXPORT_SYMBOL(sk_busy_loop_end); |
|---|
| 3537 | 3746 | #endif /* CONFIG_NET_RX_BUSY_POLL */ |
|---|
| 3747 | + |
|---|
| 3748 | +int sock_bind_add(struct sock *sk, struct sockaddr *addr, int addr_len) |
|---|
| 3749 | +{ |
|---|
| 3750 | + if (!sk->sk_prot->bind_add) |
|---|
| 3751 | + return -EOPNOTSUPP; |
|---|
| 3752 | + return sk->sk_prot->bind_add(sk, addr, addr_len); |
|---|
| 3753 | +} |
|---|
| 3754 | +EXPORT_SYMBOL(sock_bind_add); |
|---|