.. | .. |
---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-or-later |
---|
1 | 2 | /* |
---|
2 | 3 | * INET An implementation of the TCP/IP protocol suite for the LINUX |
---|
3 | 4 | * operating system. INET is implemented using the BSD Socket |
---|
.. | .. |
---|
5 | 6 | * |
---|
6 | 7 | * Generic socket support routines. Memory allocators, socket lock/release |
---|
7 | 8 | * handler for protocols to use and generic option handler. |
---|
8 | | - * |
---|
9 | 9 | * |
---|
10 | 10 | * Authors: Ross Biro |
---|
11 | 11 | * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> |
---|
.. | .. |
---|
81 | 81 | * Arnaldo C. Melo : cleanups, use skb_queue_purge |
---|
82 | 82 | * |
---|
83 | 83 | * To Fix: |
---|
84 | | - * |
---|
85 | | - * |
---|
86 | | - * This program is free software; you can redistribute it and/or |
---|
87 | | - * modify it under the terms of the GNU General Public License |
---|
88 | | - * as published by the Free Software Foundation; either version |
---|
89 | | - * 2 of the License, or (at your option) any later version. |
---|
90 | 84 | */ |
---|
91 | 85 | |
---|
92 | 86 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
---|
.. | .. |
---|
119 | 113 | #include <linux/static_key.h> |
---|
120 | 114 | #include <linux/memcontrol.h> |
---|
121 | 115 | #include <linux/prefetch.h> |
---|
| 116 | +#include <linux/compat.h> |
---|
122 | 117 | |
---|
123 | 118 | #include <linux/uaccess.h> |
---|
124 | 119 | |
---|
.. | .. |
---|
137 | 132 | |
---|
138 | 133 | #include <linux/filter.h> |
---|
139 | 134 | #include <net/sock_reuseport.h> |
---|
| 135 | +#include <net/bpf_sk_storage.h> |
---|
140 | 136 | |
---|
141 | 137 | #include <trace/events/sock.h> |
---|
| 138 | +#include <trace/hooks/sched.h> |
---|
142 | 139 | |
---|
143 | 140 | #include <net/tcp.h> |
---|
144 | 141 | #include <net/busy_poll.h> |
---|
.. | .. |
---|
335 | 332 | } |
---|
336 | 333 | EXPORT_SYMBOL(__sk_backlog_rcv); |
---|
337 | 334 | |
---|
338 | | -static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen) |
---|
| 335 | +static int sock_get_timeout(long timeo, void *optval, bool old_timeval) |
---|
339 | 336 | { |
---|
340 | | - struct timeval tv; |
---|
| 337 | + struct __kernel_sock_timeval tv; |
---|
341 | 338 | |
---|
342 | | - if (optlen < sizeof(tv)) |
---|
343 | | - return -EINVAL; |
---|
344 | | - if (copy_from_user(&tv, optval, sizeof(tv))) |
---|
345 | | - return -EFAULT; |
---|
| 339 | + if (timeo == MAX_SCHEDULE_TIMEOUT) { |
---|
| 340 | + tv.tv_sec = 0; |
---|
| 341 | + tv.tv_usec = 0; |
---|
| 342 | + } else { |
---|
| 343 | + tv.tv_sec = timeo / HZ; |
---|
| 344 | + tv.tv_usec = ((timeo % HZ) * USEC_PER_SEC) / HZ; |
---|
| 345 | + } |
---|
| 346 | + |
---|
| 347 | + if (old_timeval && in_compat_syscall() && !COMPAT_USE_64BIT_TIME) { |
---|
| 348 | + struct old_timeval32 tv32 = { tv.tv_sec, tv.tv_usec }; |
---|
| 349 | + *(struct old_timeval32 *)optval = tv32; |
---|
| 350 | + return sizeof(tv32); |
---|
| 351 | + } |
---|
| 352 | + |
---|
| 353 | + if (old_timeval) { |
---|
| 354 | + struct __kernel_old_timeval old_tv; |
---|
| 355 | + old_tv.tv_sec = tv.tv_sec; |
---|
| 356 | + old_tv.tv_usec = tv.tv_usec; |
---|
| 357 | + *(struct __kernel_old_timeval *)optval = old_tv; |
---|
| 358 | + return sizeof(old_tv); |
---|
| 359 | + } |
---|
| 360 | + |
---|
| 361 | + *(struct __kernel_sock_timeval *)optval = tv; |
---|
| 362 | + return sizeof(tv); |
---|
| 363 | +} |
---|
| 364 | + |
---|
| 365 | +static int sock_set_timeout(long *timeo_p, sockptr_t optval, int optlen, |
---|
| 366 | + bool old_timeval) |
---|
| 367 | +{ |
---|
| 368 | + struct __kernel_sock_timeval tv; |
---|
| 369 | + |
---|
| 370 | + if (old_timeval && in_compat_syscall() && !COMPAT_USE_64BIT_TIME) { |
---|
| 371 | + struct old_timeval32 tv32; |
---|
| 372 | + |
---|
| 373 | + if (optlen < sizeof(tv32)) |
---|
| 374 | + return -EINVAL; |
---|
| 375 | + |
---|
| 376 | + if (copy_from_sockptr(&tv32, optval, sizeof(tv32))) |
---|
| 377 | + return -EFAULT; |
---|
| 378 | + tv.tv_sec = tv32.tv_sec; |
---|
| 379 | + tv.tv_usec = tv32.tv_usec; |
---|
| 380 | + } else if (old_timeval) { |
---|
| 381 | + struct __kernel_old_timeval old_tv; |
---|
| 382 | + |
---|
| 383 | + if (optlen < sizeof(old_tv)) |
---|
| 384 | + return -EINVAL; |
---|
| 385 | + if (copy_from_sockptr(&old_tv, optval, sizeof(old_tv))) |
---|
| 386 | + return -EFAULT; |
---|
| 387 | + tv.tv_sec = old_tv.tv_sec; |
---|
| 388 | + tv.tv_usec = old_tv.tv_usec; |
---|
| 389 | + } else { |
---|
| 390 | + if (optlen < sizeof(tv)) |
---|
| 391 | + return -EINVAL; |
---|
| 392 | + if (copy_from_sockptr(&tv, optval, sizeof(tv))) |
---|
| 393 | + return -EFAULT; |
---|
| 394 | + } |
---|
346 | 395 | if (tv.tv_usec < 0 || tv.tv_usec >= USEC_PER_SEC) |
---|
347 | 396 | return -EDOM; |
---|
348 | 397 | |
---|
.. | .. |
---|
360 | 409 | *timeo_p = MAX_SCHEDULE_TIMEOUT; |
---|
361 | 410 | if (tv.tv_sec == 0 && tv.tv_usec == 0) |
---|
362 | 411 | return 0; |
---|
363 | | - if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT/HZ - 1)) |
---|
364 | | - *timeo_p = tv.tv_sec * HZ + DIV_ROUND_UP(tv.tv_usec, USEC_PER_SEC / HZ); |
---|
| 412 | + if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT / HZ - 1)) |
---|
| 413 | + *timeo_p = tv.tv_sec * HZ + DIV_ROUND_UP((unsigned long)tv.tv_usec, USEC_PER_SEC / HZ); |
---|
365 | 414 | return 0; |
---|
366 | | -} |
---|
367 | | - |
---|
368 | | -static void sock_warn_obsolete_bsdism(const char *name) |
---|
369 | | -{ |
---|
370 | | - static int warned; |
---|
371 | | - static char warncomm[TASK_COMM_LEN]; |
---|
372 | | - if (strcmp(warncomm, current->comm) && warned < 5) { |
---|
373 | | - strcpy(warncomm, current->comm); |
---|
374 | | - pr_warn("process `%s' is using obsolete %s SO_BSDCOMPAT\n", |
---|
375 | | - warncomm, name); |
---|
376 | | - warned++; |
---|
377 | | - } |
---|
378 | 415 | } |
---|
379 | 416 | |
---|
380 | 417 | static bool sock_needs_netstamp(const struct sock *sk) |
---|
.. | .. |
---|
472 | 509 | |
---|
473 | 510 | rc = sk_backlog_rcv(sk, skb); |
---|
474 | 511 | |
---|
475 | | - mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_); |
---|
476 | | - } else if (sk_add_backlog(sk, skb, sk->sk_rcvbuf)) { |
---|
| 512 | + mutex_release(&sk->sk_lock.dep_map, _RET_IP_); |
---|
| 513 | + } else if (sk_add_backlog(sk, skb, READ_ONCE(sk->sk_rcvbuf))) { |
---|
477 | 514 | bh_unlock_sock(sk); |
---|
478 | 515 | atomic_inc(&sk->sk_drops); |
---|
479 | 516 | goto discard_and_relse; |
---|
.. | .. |
---|
520 | 557 | } |
---|
521 | 558 | EXPORT_SYMBOL(sk_dst_check); |
---|
522 | 559 | |
---|
523 | | -static int sock_setbindtodevice(struct sock *sk, char __user *optval, |
---|
524 | | - int optlen) |
---|
| 560 | +static int sock_bindtoindex_locked(struct sock *sk, int ifindex) |
---|
| 561 | +{ |
---|
| 562 | + int ret = -ENOPROTOOPT; |
---|
| 563 | +#ifdef CONFIG_NETDEVICES |
---|
| 564 | + struct net *net = sock_net(sk); |
---|
| 565 | + |
---|
| 566 | + /* Sorry... */ |
---|
| 567 | + ret = -EPERM; |
---|
| 568 | + if (sk->sk_bound_dev_if && !ns_capable(net->user_ns, CAP_NET_RAW)) |
---|
| 569 | + goto out; |
---|
| 570 | + |
---|
| 571 | + ret = -EINVAL; |
---|
| 572 | + if (ifindex < 0) |
---|
| 573 | + goto out; |
---|
| 574 | + |
---|
| 575 | + sk->sk_bound_dev_if = ifindex; |
---|
| 576 | + if (sk->sk_prot->rehash) |
---|
| 577 | + sk->sk_prot->rehash(sk); |
---|
| 578 | + sk_dst_reset(sk); |
---|
| 579 | + |
---|
| 580 | + ret = 0; |
---|
| 581 | + |
---|
| 582 | +out: |
---|
| 583 | +#endif |
---|
| 584 | + |
---|
| 585 | + return ret; |
---|
| 586 | +} |
---|
| 587 | + |
---|
| 588 | +int sock_bindtoindex(struct sock *sk, int ifindex, bool lock_sk) |
---|
| 589 | +{ |
---|
| 590 | + int ret; |
---|
| 591 | + |
---|
| 592 | + if (lock_sk) |
---|
| 593 | + lock_sock(sk); |
---|
| 594 | + ret = sock_bindtoindex_locked(sk, ifindex); |
---|
| 595 | + if (lock_sk) |
---|
| 596 | + release_sock(sk); |
---|
| 597 | + |
---|
| 598 | + return ret; |
---|
| 599 | +} |
---|
| 600 | +EXPORT_SYMBOL(sock_bindtoindex); |
---|
| 601 | + |
---|
| 602 | +static int sock_setbindtodevice(struct sock *sk, sockptr_t optval, int optlen) |
---|
525 | 603 | { |
---|
526 | 604 | int ret = -ENOPROTOOPT; |
---|
527 | 605 | #ifdef CONFIG_NETDEVICES |
---|
528 | 606 | struct net *net = sock_net(sk); |
---|
529 | 607 | char devname[IFNAMSIZ]; |
---|
530 | 608 | int index; |
---|
531 | | - |
---|
532 | | - /* Sorry... */ |
---|
533 | | - ret = -EPERM; |
---|
534 | | - if (!ns_capable(net->user_ns, CAP_NET_RAW)) |
---|
535 | | - goto out; |
---|
536 | 609 | |
---|
537 | 610 | ret = -EINVAL; |
---|
538 | 611 | if (optlen < 0) |
---|
.. | .. |
---|
548 | 621 | memset(devname, 0, sizeof(devname)); |
---|
549 | 622 | |
---|
550 | 623 | ret = -EFAULT; |
---|
551 | | - if (copy_from_user(devname, optval, optlen)) |
---|
| 624 | + if (copy_from_sockptr(devname, optval, optlen)) |
---|
552 | 625 | goto out; |
---|
553 | 626 | |
---|
554 | 627 | index = 0; |
---|
.. | .. |
---|
565 | 638 | goto out; |
---|
566 | 639 | } |
---|
567 | 640 | |
---|
568 | | - lock_sock(sk); |
---|
569 | | - sk->sk_bound_dev_if = index; |
---|
570 | | - sk_dst_reset(sk); |
---|
571 | | - release_sock(sk); |
---|
572 | | - |
---|
573 | | - ret = 0; |
---|
574 | | - |
---|
| 641 | + return sock_bindtoindex(sk, index, true); |
---|
575 | 642 | out: |
---|
576 | 643 | #endif |
---|
577 | 644 | |
---|
.. | .. |
---|
618 | 685 | return ret; |
---|
619 | 686 | } |
---|
620 | 687 | |
---|
621 | | -static inline void sock_valbool_flag(struct sock *sk, int bit, int valbool) |
---|
622 | | -{ |
---|
623 | | - if (valbool) |
---|
624 | | - sock_set_flag(sk, bit); |
---|
625 | | - else |
---|
626 | | - sock_reset_flag(sk, bit); |
---|
627 | | -} |
---|
628 | | - |
---|
629 | 688 | bool sk_mc_loop(struct sock *sk) |
---|
630 | 689 | { |
---|
631 | 690 | if (dev_recursion_level()) |
---|
632 | 691 | return false; |
---|
633 | 692 | if (!sk) |
---|
634 | 693 | return true; |
---|
635 | | - switch (sk->sk_family) { |
---|
| 694 | + /* IPV6_ADDRFORM can change sk->sk_family under us. */ |
---|
| 695 | + switch (READ_ONCE(sk->sk_family)) { |
---|
636 | 696 | case AF_INET: |
---|
637 | 697 | return inet_sk(sk)->mc_loop; |
---|
638 | 698 | #if IS_ENABLED(CONFIG_IPV6) |
---|
.. | .. |
---|
645 | 705 | } |
---|
646 | 706 | EXPORT_SYMBOL(sk_mc_loop); |
---|
647 | 707 | |
---|
| 708 | +void sock_set_reuseaddr(struct sock *sk) |
---|
| 709 | +{ |
---|
| 710 | + lock_sock(sk); |
---|
| 711 | + sk->sk_reuse = SK_CAN_REUSE; |
---|
| 712 | + release_sock(sk); |
---|
| 713 | +} |
---|
| 714 | +EXPORT_SYMBOL(sock_set_reuseaddr); |
---|
| 715 | + |
---|
| 716 | +void sock_set_reuseport(struct sock *sk) |
---|
| 717 | +{ |
---|
| 718 | + lock_sock(sk); |
---|
| 719 | + sk->sk_reuseport = true; |
---|
| 720 | + release_sock(sk); |
---|
| 721 | +} |
---|
| 722 | +EXPORT_SYMBOL(sock_set_reuseport); |
---|
| 723 | + |
---|
| 724 | +void sock_no_linger(struct sock *sk) |
---|
| 725 | +{ |
---|
| 726 | + lock_sock(sk); |
---|
| 727 | + sk->sk_lingertime = 0; |
---|
| 728 | + sock_set_flag(sk, SOCK_LINGER); |
---|
| 729 | + release_sock(sk); |
---|
| 730 | +} |
---|
| 731 | +EXPORT_SYMBOL(sock_no_linger); |
---|
| 732 | + |
---|
| 733 | +void sock_set_priority(struct sock *sk, u32 priority) |
---|
| 734 | +{ |
---|
| 735 | + lock_sock(sk); |
---|
| 736 | + sk->sk_priority = priority; |
---|
| 737 | + release_sock(sk); |
---|
| 738 | +} |
---|
| 739 | +EXPORT_SYMBOL(sock_set_priority); |
---|
| 740 | + |
---|
| 741 | +void sock_set_sndtimeo(struct sock *sk, s64 secs) |
---|
| 742 | +{ |
---|
| 743 | + lock_sock(sk); |
---|
| 744 | + if (secs && secs < MAX_SCHEDULE_TIMEOUT / HZ - 1) |
---|
| 745 | + sk->sk_sndtimeo = secs * HZ; |
---|
| 746 | + else |
---|
| 747 | + sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT; |
---|
| 748 | + release_sock(sk); |
---|
| 749 | +} |
---|
| 750 | +EXPORT_SYMBOL(sock_set_sndtimeo); |
---|
| 751 | + |
---|
| 752 | +static void __sock_set_timestamps(struct sock *sk, bool val, bool new, bool ns) |
---|
| 753 | +{ |
---|
| 754 | + if (val) { |
---|
| 755 | + sock_valbool_flag(sk, SOCK_TSTAMP_NEW, new); |
---|
| 756 | + sock_valbool_flag(sk, SOCK_RCVTSTAMPNS, ns); |
---|
| 757 | + sock_set_flag(sk, SOCK_RCVTSTAMP); |
---|
| 758 | + sock_enable_timestamp(sk, SOCK_TIMESTAMP); |
---|
| 759 | + } else { |
---|
| 760 | + sock_reset_flag(sk, SOCK_RCVTSTAMP); |
---|
| 761 | + sock_reset_flag(sk, SOCK_RCVTSTAMPNS); |
---|
| 762 | + } |
---|
| 763 | +} |
---|
| 764 | + |
---|
| 765 | +void sock_enable_timestamps(struct sock *sk) |
---|
| 766 | +{ |
---|
| 767 | + lock_sock(sk); |
---|
| 768 | + __sock_set_timestamps(sk, true, false, true); |
---|
| 769 | + release_sock(sk); |
---|
| 770 | +} |
---|
| 771 | +EXPORT_SYMBOL(sock_enable_timestamps); |
---|
| 772 | + |
---|
| 773 | +void sock_set_keepalive(struct sock *sk) |
---|
| 774 | +{ |
---|
| 775 | + lock_sock(sk); |
---|
| 776 | + if (sk->sk_prot->keepalive) |
---|
| 777 | + sk->sk_prot->keepalive(sk, true); |
---|
| 778 | + sock_valbool_flag(sk, SOCK_KEEPOPEN, true); |
---|
| 779 | + release_sock(sk); |
---|
| 780 | +} |
---|
| 781 | +EXPORT_SYMBOL(sock_set_keepalive); |
---|
| 782 | + |
---|
| 783 | +static void __sock_set_rcvbuf(struct sock *sk, int val) |
---|
| 784 | +{ |
---|
| 785 | + /* Ensure val * 2 fits into an int, to prevent max_t() from treating it |
---|
| 786 | + * as a negative value. |
---|
| 787 | + */ |
---|
| 788 | + val = min_t(int, val, INT_MAX / 2); |
---|
| 789 | + sk->sk_userlocks |= SOCK_RCVBUF_LOCK; |
---|
| 790 | + |
---|
| 791 | + /* We double it on the way in to account for "struct sk_buff" etc. |
---|
| 792 | + * overhead. Applications assume that the SO_RCVBUF setting they make |
---|
| 793 | + * will allow that much actual data to be received on that socket. |
---|
| 794 | + * |
---|
| 795 | + * Applications are unaware that "struct sk_buff" and other overheads |
---|
| 796 | + * allocate from the receive buffer during socket buffer allocation. |
---|
| 797 | + * |
---|
| 798 | + * And after considering the possible alternatives, returning the value |
---|
| 799 | + * we actually used in getsockopt is the most desirable behavior. |
---|
| 800 | + */ |
---|
| 801 | + WRITE_ONCE(sk->sk_rcvbuf, max_t(int, val * 2, SOCK_MIN_RCVBUF)); |
---|
| 802 | +} |
---|
| 803 | + |
---|
| 804 | +void sock_set_rcvbuf(struct sock *sk, int val) |
---|
| 805 | +{ |
---|
| 806 | + lock_sock(sk); |
---|
| 807 | + __sock_set_rcvbuf(sk, val); |
---|
| 808 | + release_sock(sk); |
---|
| 809 | +} |
---|
| 810 | +EXPORT_SYMBOL(sock_set_rcvbuf); |
---|
| 811 | + |
---|
| 812 | +static void __sock_set_mark(struct sock *sk, u32 val) |
---|
| 813 | +{ |
---|
| 814 | + if (val != sk->sk_mark) { |
---|
| 815 | + sk->sk_mark = val; |
---|
| 816 | + sk_dst_reset(sk); |
---|
| 817 | + } |
---|
| 818 | +} |
---|
| 819 | + |
---|
| 820 | +void sock_set_mark(struct sock *sk, u32 val) |
---|
| 821 | +{ |
---|
| 822 | + lock_sock(sk); |
---|
| 823 | + __sock_set_mark(sk, val); |
---|
| 824 | + release_sock(sk); |
---|
| 825 | +} |
---|
| 826 | +EXPORT_SYMBOL(sock_set_mark); |
---|
| 827 | + |
---|
648 | 828 | /* |
---|
649 | 829 | * This is meant for all protocols to use and covers goings on |
---|
650 | 830 | * at the socket level. Everything here is generic. |
---|
651 | 831 | */ |
---|
652 | 832 | |
---|
653 | 833 | int sock_setsockopt(struct socket *sock, int level, int optname, |
---|
654 | | - char __user *optval, unsigned int optlen) |
---|
| 834 | + sockptr_t optval, unsigned int optlen) |
---|
655 | 835 | { |
---|
656 | 836 | struct sock_txtime sk_txtime; |
---|
657 | 837 | struct sock *sk = sock->sk; |
---|
.. | .. |
---|
670 | 850 | if (optlen < sizeof(int)) |
---|
671 | 851 | return -EINVAL; |
---|
672 | 852 | |
---|
673 | | - if (get_user(val, (int __user *)optval)) |
---|
| 853 | + if (copy_from_sockptr(&val, optval, sizeof(val))) |
---|
674 | 854 | return -EFAULT; |
---|
675 | 855 | |
---|
676 | 856 | valbool = val ? 1 : 0; |
---|
.. | .. |
---|
709 | 889 | * play 'guess the biggest size' games. RCVBUF/SNDBUF |
---|
710 | 890 | * are treated in BSD as hints |
---|
711 | 891 | */ |
---|
712 | | - val = min_t(u32, val, sysctl_wmem_max); |
---|
| 892 | + val = min_t(u32, val, READ_ONCE(sysctl_wmem_max)); |
---|
713 | 893 | set_sndbuf: |
---|
| 894 | + /* Ensure val * 2 fits into an int, to prevent max_t() |
---|
| 895 | + * from treating it as a negative value. |
---|
| 896 | + */ |
---|
| 897 | + val = min_t(int, val, INT_MAX / 2); |
---|
714 | 898 | sk->sk_userlocks |= SOCK_SNDBUF_LOCK; |
---|
715 | | - sk->sk_sndbuf = max_t(int, val * 2, SOCK_MIN_SNDBUF); |
---|
| 899 | + WRITE_ONCE(sk->sk_sndbuf, |
---|
| 900 | + max_t(int, val * 2, SOCK_MIN_SNDBUF)); |
---|
716 | 901 | /* Wake up sending tasks if we upped the value. */ |
---|
717 | 902 | sk->sk_write_space(sk); |
---|
718 | 903 | break; |
---|
.. | .. |
---|
722 | 907 | ret = -EPERM; |
---|
723 | 908 | break; |
---|
724 | 909 | } |
---|
| 910 | + |
---|
| 911 | + /* No negative values (to prevent underflow, as val will be |
---|
| 912 | + * multiplied by 2). |
---|
| 913 | + */ |
---|
| 914 | + if (val < 0) |
---|
| 915 | + val = 0; |
---|
725 | 916 | goto set_sndbuf; |
---|
726 | 917 | |
---|
727 | 918 | case SO_RCVBUF: |
---|
.. | .. |
---|
730 | 921 | * play 'guess the biggest size' games. RCVBUF/SNDBUF |
---|
731 | 922 | * are treated in BSD as hints |
---|
732 | 923 | */ |
---|
733 | | - val = min_t(u32, val, sysctl_rmem_max); |
---|
734 | | -set_rcvbuf: |
---|
735 | | - sk->sk_userlocks |= SOCK_RCVBUF_LOCK; |
---|
736 | | - /* |
---|
737 | | - * We double it on the way in to account for |
---|
738 | | - * "struct sk_buff" etc. overhead. Applications |
---|
739 | | - * assume that the SO_RCVBUF setting they make will |
---|
740 | | - * allow that much actual data to be received on that |
---|
741 | | - * socket. |
---|
742 | | - * |
---|
743 | | - * Applications are unaware that "struct sk_buff" and |
---|
744 | | - * other overheads allocate from the receive buffer |
---|
745 | | - * during socket buffer allocation. |
---|
746 | | - * |
---|
747 | | - * And after considering the possible alternatives, |
---|
748 | | - * returning the value we actually used in getsockopt |
---|
749 | | - * is the most desirable behavior. |
---|
750 | | - */ |
---|
751 | | - sk->sk_rcvbuf = max_t(int, val * 2, SOCK_MIN_RCVBUF); |
---|
| 924 | + __sock_set_rcvbuf(sk, min_t(u32, val, READ_ONCE(sysctl_rmem_max))); |
---|
752 | 925 | break; |
---|
753 | 926 | |
---|
754 | 927 | case SO_RCVBUFFORCE: |
---|
.. | .. |
---|
756 | 929 | ret = -EPERM; |
---|
757 | 930 | break; |
---|
758 | 931 | } |
---|
759 | | - goto set_rcvbuf; |
---|
| 932 | + |
---|
| 933 | + /* No negative values (to prevent underflow, as val will be |
---|
| 934 | + * multiplied by 2). |
---|
| 935 | + */ |
---|
| 936 | + __sock_set_rcvbuf(sk, max(val, 0)); |
---|
| 937 | + break; |
---|
760 | 938 | |
---|
761 | 939 | case SO_KEEPALIVE: |
---|
762 | 940 | if (sk->sk_prot->keepalive) |
---|
.. | .. |
---|
785 | 963 | ret = -EINVAL; /* 1003.1g */ |
---|
786 | 964 | break; |
---|
787 | 965 | } |
---|
788 | | - if (copy_from_user(&ling, optval, sizeof(ling))) { |
---|
| 966 | + if (copy_from_sockptr(&ling, optval, sizeof(ling))) { |
---|
789 | 967 | ret = -EFAULT; |
---|
790 | 968 | break; |
---|
791 | 969 | } |
---|
.. | .. |
---|
803 | 981 | break; |
---|
804 | 982 | |
---|
805 | 983 | case SO_BSDCOMPAT: |
---|
806 | | - sock_warn_obsolete_bsdism("setsockopt"); |
---|
807 | 984 | break; |
---|
808 | 985 | |
---|
809 | 986 | case SO_PASSCRED: |
---|
.. | .. |
---|
813 | 990 | clear_bit(SOCK_PASSCRED, &sock->flags); |
---|
814 | 991 | break; |
---|
815 | 992 | |
---|
816 | | - case SO_TIMESTAMP: |
---|
817 | | - case SO_TIMESTAMPNS: |
---|
818 | | - if (valbool) { |
---|
819 | | - if (optname == SO_TIMESTAMP) |
---|
820 | | - sock_reset_flag(sk, SOCK_RCVTSTAMPNS); |
---|
821 | | - else |
---|
822 | | - sock_set_flag(sk, SOCK_RCVTSTAMPNS); |
---|
823 | | - sock_set_flag(sk, SOCK_RCVTSTAMP); |
---|
824 | | - sock_enable_timestamp(sk, SOCK_TIMESTAMP); |
---|
825 | | - } else { |
---|
826 | | - sock_reset_flag(sk, SOCK_RCVTSTAMP); |
---|
827 | | - sock_reset_flag(sk, SOCK_RCVTSTAMPNS); |
---|
828 | | - } |
---|
| 993 | + case SO_TIMESTAMP_OLD: |
---|
| 994 | + __sock_set_timestamps(sk, valbool, false, false); |
---|
829 | 995 | break; |
---|
830 | | - |
---|
831 | | - case SO_TIMESTAMPING: |
---|
| 996 | + case SO_TIMESTAMP_NEW: |
---|
| 997 | + __sock_set_timestamps(sk, valbool, true, false); |
---|
| 998 | + break; |
---|
| 999 | + case SO_TIMESTAMPNS_OLD: |
---|
| 1000 | + __sock_set_timestamps(sk, valbool, false, true); |
---|
| 1001 | + break; |
---|
| 1002 | + case SO_TIMESTAMPNS_NEW: |
---|
| 1003 | + __sock_set_timestamps(sk, valbool, true, true); |
---|
| 1004 | + break; |
---|
| 1005 | + case SO_TIMESTAMPING_NEW: |
---|
| 1006 | + case SO_TIMESTAMPING_OLD: |
---|
832 | 1007 | if (val & ~SOF_TIMESTAMPING_MASK) { |
---|
833 | 1008 | ret = -EINVAL; |
---|
834 | 1009 | break; |
---|
.. | .. |
---|
856 | 1031 | } |
---|
857 | 1032 | |
---|
858 | 1033 | sk->sk_tsflags = val; |
---|
| 1034 | + sock_valbool_flag(sk, SOCK_TSTAMP_NEW, optname == SO_TIMESTAMPING_NEW); |
---|
| 1035 | + |
---|
859 | 1036 | if (val & SOF_TIMESTAMPING_RX_SOFTWARE) |
---|
860 | 1037 | sock_enable_timestamp(sk, |
---|
861 | 1038 | SOCK_TIMESTAMPING_RX_SOFTWARE); |
---|
.. | .. |
---|
870 | 1047 | if (sock->ops->set_rcvlowat) |
---|
871 | 1048 | ret = sock->ops->set_rcvlowat(sk, val); |
---|
872 | 1049 | else |
---|
873 | | - sk->sk_rcvlowat = val ? : 1; |
---|
| 1050 | + WRITE_ONCE(sk->sk_rcvlowat, val ? : 1); |
---|
874 | 1051 | break; |
---|
875 | 1052 | |
---|
876 | | - case SO_RCVTIMEO: |
---|
877 | | - ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen); |
---|
| 1053 | + case SO_RCVTIMEO_OLD: |
---|
| 1054 | + case SO_RCVTIMEO_NEW: |
---|
| 1055 | + ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, |
---|
| 1056 | + optlen, optname == SO_RCVTIMEO_OLD); |
---|
878 | 1057 | break; |
---|
879 | 1058 | |
---|
880 | | - case SO_SNDTIMEO: |
---|
881 | | - ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen); |
---|
| 1059 | + case SO_SNDTIMEO_OLD: |
---|
| 1060 | + case SO_SNDTIMEO_NEW: |
---|
| 1061 | + ret = sock_set_timeout(&sk->sk_sndtimeo, optval, |
---|
| 1062 | + optlen, optname == SO_SNDTIMEO_OLD); |
---|
882 | 1063 | break; |
---|
883 | 1064 | |
---|
884 | | - case SO_ATTACH_FILTER: |
---|
885 | | - ret = -EINVAL; |
---|
886 | | - if (optlen == sizeof(struct sock_fprog)) { |
---|
887 | | - struct sock_fprog fprog; |
---|
| 1065 | + case SO_ATTACH_FILTER: { |
---|
| 1066 | + struct sock_fprog fprog; |
---|
888 | 1067 | |
---|
889 | | - ret = -EFAULT; |
---|
890 | | - if (copy_from_user(&fprog, optval, sizeof(fprog))) |
---|
891 | | - break; |
---|
892 | | - |
---|
| 1068 | + ret = copy_bpf_fprog_from_user(&fprog, optval, optlen); |
---|
| 1069 | + if (!ret) |
---|
893 | 1070 | ret = sk_attach_filter(&fprog, sk); |
---|
894 | | - } |
---|
895 | 1071 | break; |
---|
896 | | - |
---|
| 1072 | + } |
---|
897 | 1073 | case SO_ATTACH_BPF: |
---|
898 | 1074 | ret = -EINVAL; |
---|
899 | 1075 | if (optlen == sizeof(u32)) { |
---|
900 | 1076 | u32 ufd; |
---|
901 | 1077 | |
---|
902 | 1078 | ret = -EFAULT; |
---|
903 | | - if (copy_from_user(&ufd, optval, sizeof(ufd))) |
---|
| 1079 | + if (copy_from_sockptr(&ufd, optval, sizeof(ufd))) |
---|
904 | 1080 | break; |
---|
905 | 1081 | |
---|
906 | 1082 | ret = sk_attach_bpf(ufd, sk); |
---|
907 | 1083 | } |
---|
908 | 1084 | break; |
---|
909 | 1085 | |
---|
910 | | - case SO_ATTACH_REUSEPORT_CBPF: |
---|
911 | | - ret = -EINVAL; |
---|
912 | | - if (optlen == sizeof(struct sock_fprog)) { |
---|
913 | | - struct sock_fprog fprog; |
---|
| 1086 | + case SO_ATTACH_REUSEPORT_CBPF: { |
---|
| 1087 | + struct sock_fprog fprog; |
---|
914 | 1088 | |
---|
915 | | - ret = -EFAULT; |
---|
916 | | - if (copy_from_user(&fprog, optval, sizeof(fprog))) |
---|
917 | | - break; |
---|
918 | | - |
---|
| 1089 | + ret = copy_bpf_fprog_from_user(&fprog, optval, optlen); |
---|
| 1090 | + if (!ret) |
---|
919 | 1091 | ret = sk_reuseport_attach_filter(&fprog, sk); |
---|
920 | | - } |
---|
921 | 1092 | break; |
---|
922 | | - |
---|
| 1093 | + } |
---|
923 | 1094 | case SO_ATTACH_REUSEPORT_EBPF: |
---|
924 | 1095 | ret = -EINVAL; |
---|
925 | 1096 | if (optlen == sizeof(u32)) { |
---|
926 | 1097 | u32 ufd; |
---|
927 | 1098 | |
---|
928 | 1099 | ret = -EFAULT; |
---|
929 | | - if (copy_from_user(&ufd, optval, sizeof(ufd))) |
---|
| 1100 | + if (copy_from_sockptr(&ufd, optval, sizeof(ufd))) |
---|
930 | 1101 | break; |
---|
931 | 1102 | |
---|
932 | 1103 | ret = sk_reuseport_attach_bpf(ufd, sk); |
---|
933 | 1104 | } |
---|
| 1105 | + break; |
---|
| 1106 | + |
---|
| 1107 | + case SO_DETACH_REUSEPORT_BPF: |
---|
| 1108 | + ret = reuseport_detach_prog(sk); |
---|
934 | 1109 | break; |
---|
935 | 1110 | |
---|
936 | 1111 | case SO_DETACH_FILTER: |
---|
.. | .. |
---|
951 | 1126 | clear_bit(SOCK_PASSSEC, &sock->flags); |
---|
952 | 1127 | break; |
---|
953 | 1128 | case SO_MARK: |
---|
954 | | - if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) |
---|
| 1129 | + if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) { |
---|
955 | 1130 | ret = -EPERM; |
---|
956 | | - else |
---|
957 | | - sk->sk_mark = val; |
---|
| 1131 | + break; |
---|
| 1132 | + } |
---|
| 1133 | + |
---|
| 1134 | + __sock_set_mark(sk, val); |
---|
958 | 1135 | break; |
---|
959 | 1136 | |
---|
960 | 1137 | case SO_RXQ_OVFL: |
---|
.. | .. |
---|
995 | 1172 | #endif |
---|
996 | 1173 | |
---|
997 | 1174 | case SO_MAX_PACING_RATE: |
---|
998 | | - if (val != ~0U) |
---|
| 1175 | + { |
---|
| 1176 | + unsigned long ulval = (val == ~0U) ? ~0UL : (unsigned int)val; |
---|
| 1177 | + |
---|
| 1178 | + if (sizeof(ulval) != sizeof(val) && |
---|
| 1179 | + optlen >= sizeof(ulval) && |
---|
| 1180 | + copy_from_sockptr(&ulval, optval, sizeof(ulval))) { |
---|
| 1181 | + ret = -EFAULT; |
---|
| 1182 | + break; |
---|
| 1183 | + } |
---|
| 1184 | + if (ulval != ~0UL) |
---|
999 | 1185 | cmpxchg(&sk->sk_pacing_status, |
---|
1000 | 1186 | SK_PACING_NONE, |
---|
1001 | 1187 | SK_PACING_NEEDED); |
---|
1002 | | - sk->sk_max_pacing_rate = val; |
---|
1003 | | - sk->sk_pacing_rate = min(sk->sk_pacing_rate, |
---|
1004 | | - sk->sk_max_pacing_rate); |
---|
| 1188 | + /* Pairs with READ_ONCE() from sk_getsockopt() */ |
---|
| 1189 | + WRITE_ONCE(sk->sk_max_pacing_rate, ulval); |
---|
| 1190 | + sk->sk_pacing_rate = min(sk->sk_pacing_rate, ulval); |
---|
1005 | 1191 | break; |
---|
1006 | | - |
---|
| 1192 | + } |
---|
1007 | 1193 | case SO_INCOMING_CPU: |
---|
1008 | 1194 | WRITE_ONCE(sk->sk_incoming_cpu, val); |
---|
1009 | 1195 | break; |
---|
.. | .. |
---|
1015 | 1201 | |
---|
1016 | 1202 | case SO_ZEROCOPY: |
---|
1017 | 1203 | if (sk->sk_family == PF_INET || sk->sk_family == PF_INET6) { |
---|
1018 | | - if (sk->sk_protocol != IPPROTO_TCP) |
---|
| 1204 | + if (!((sk->sk_type == SOCK_STREAM && |
---|
| 1205 | + sk->sk_protocol == IPPROTO_TCP) || |
---|
| 1206 | + (sk->sk_type == SOCK_DGRAM && |
---|
| 1207 | + sk->sk_protocol == IPPROTO_UDP))) |
---|
1019 | 1208 | ret = -ENOTSUPP; |
---|
1020 | 1209 | } else if (sk->sk_family != PF_RDS) { |
---|
1021 | 1210 | ret = -ENOTSUPP; |
---|
.. | .. |
---|
1029 | 1218 | break; |
---|
1030 | 1219 | |
---|
1031 | 1220 | case SO_TXTIME: |
---|
1032 | | - if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) { |
---|
1033 | | - ret = -EPERM; |
---|
1034 | | - } else if (optlen != sizeof(struct sock_txtime)) { |
---|
| 1221 | + if (optlen != sizeof(struct sock_txtime)) { |
---|
1035 | 1222 | ret = -EINVAL; |
---|
1036 | | - } else if (copy_from_user(&sk_txtime, optval, |
---|
| 1223 | + break; |
---|
| 1224 | + } else if (copy_from_sockptr(&sk_txtime, optval, |
---|
1037 | 1225 | sizeof(struct sock_txtime))) { |
---|
1038 | 1226 | ret = -EFAULT; |
---|
| 1227 | + break; |
---|
1039 | 1228 | } else if (sk_txtime.flags & ~SOF_TXTIME_FLAGS_MASK) { |
---|
1040 | 1229 | ret = -EINVAL; |
---|
1041 | | - } else { |
---|
1042 | | - sock_valbool_flag(sk, SOCK_TXTIME, true); |
---|
1043 | | - sk->sk_clockid = sk_txtime.clockid; |
---|
1044 | | - sk->sk_txtime_deadline_mode = |
---|
1045 | | - !!(sk_txtime.flags & SOF_TXTIME_DEADLINE_MODE); |
---|
1046 | | - sk->sk_txtime_report_errors = |
---|
1047 | | - !!(sk_txtime.flags & SOF_TXTIME_REPORT_ERRORS); |
---|
| 1230 | + break; |
---|
1048 | 1231 | } |
---|
| 1232 | + /* CLOCK_MONOTONIC is only used by sch_fq, and this packet |
---|
| 1233 | + * scheduler has enough safe guards. |
---|
| 1234 | + */ |
---|
| 1235 | + if (sk_txtime.clockid != CLOCK_MONOTONIC && |
---|
| 1236 | + !ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) { |
---|
| 1237 | + ret = -EPERM; |
---|
| 1238 | + break; |
---|
| 1239 | + } |
---|
| 1240 | + sock_valbool_flag(sk, SOCK_TXTIME, true); |
---|
| 1241 | + sk->sk_clockid = sk_txtime.clockid; |
---|
| 1242 | + sk->sk_txtime_deadline_mode = |
---|
| 1243 | + !!(sk_txtime.flags & SOF_TXTIME_DEADLINE_MODE); |
---|
| 1244 | + sk->sk_txtime_report_errors = |
---|
| 1245 | + !!(sk_txtime.flags & SOF_TXTIME_REPORT_ERRORS); |
---|
| 1246 | + break; |
---|
| 1247 | + |
---|
| 1248 | + case SO_BINDTOIFINDEX: |
---|
| 1249 | + ret = sock_bindtoindex_locked(sk, val); |
---|
1049 | 1250 | break; |
---|
1050 | 1251 | |
---|
1051 | 1252 | default: |
---|
.. | .. |
---|
1101 | 1302 | union { |
---|
1102 | 1303 | int val; |
---|
1103 | 1304 | u64 val64; |
---|
| 1305 | + unsigned long ulval; |
---|
1104 | 1306 | struct linger ling; |
---|
1105 | | - struct timeval tm; |
---|
| 1307 | + struct old_timeval32 tm32; |
---|
| 1308 | + struct __kernel_old_timeval tm; |
---|
| 1309 | + struct __kernel_sock_timeval stm; |
---|
1106 | 1310 | struct sock_txtime txtime; |
---|
1107 | 1311 | } v; |
---|
1108 | 1312 | |
---|
.. | .. |
---|
1130 | 1334 | break; |
---|
1131 | 1335 | |
---|
1132 | 1336 | case SO_SNDBUF: |
---|
1133 | | - v.val = sk->sk_sndbuf; |
---|
| 1337 | + v.val = READ_ONCE(sk->sk_sndbuf); |
---|
1134 | 1338 | break; |
---|
1135 | 1339 | |
---|
1136 | 1340 | case SO_RCVBUF: |
---|
1137 | | - v.val = sk->sk_rcvbuf; |
---|
| 1341 | + v.val = READ_ONCE(sk->sk_rcvbuf); |
---|
1138 | 1342 | break; |
---|
1139 | 1343 | |
---|
1140 | 1344 | case SO_REUSEADDR: |
---|
.. | .. |
---|
1186 | 1390 | break; |
---|
1187 | 1391 | |
---|
1188 | 1392 | case SO_BSDCOMPAT: |
---|
1189 | | - sock_warn_obsolete_bsdism("getsockopt"); |
---|
1190 | 1393 | break; |
---|
1191 | 1394 | |
---|
1192 | | - case SO_TIMESTAMP: |
---|
| 1395 | + case SO_TIMESTAMP_OLD: |
---|
1193 | 1396 | v.val = sock_flag(sk, SOCK_RCVTSTAMP) && |
---|
| 1397 | + !sock_flag(sk, SOCK_TSTAMP_NEW) && |
---|
1194 | 1398 | !sock_flag(sk, SOCK_RCVTSTAMPNS); |
---|
1195 | 1399 | break; |
---|
1196 | 1400 | |
---|
1197 | | - case SO_TIMESTAMPNS: |
---|
1198 | | - v.val = sock_flag(sk, SOCK_RCVTSTAMPNS); |
---|
| 1401 | + case SO_TIMESTAMPNS_OLD: |
---|
| 1402 | + v.val = sock_flag(sk, SOCK_RCVTSTAMPNS) && !sock_flag(sk, SOCK_TSTAMP_NEW); |
---|
1199 | 1403 | break; |
---|
1200 | 1404 | |
---|
1201 | | - case SO_TIMESTAMPING: |
---|
| 1405 | + case SO_TIMESTAMP_NEW: |
---|
| 1406 | + v.val = sock_flag(sk, SOCK_RCVTSTAMP) && sock_flag(sk, SOCK_TSTAMP_NEW); |
---|
| 1407 | + break; |
---|
| 1408 | + |
---|
| 1409 | + case SO_TIMESTAMPNS_NEW: |
---|
| 1410 | + v.val = sock_flag(sk, SOCK_RCVTSTAMPNS) && sock_flag(sk, SOCK_TSTAMP_NEW); |
---|
| 1411 | + break; |
---|
| 1412 | + |
---|
| 1413 | + case SO_TIMESTAMPING_OLD: |
---|
1202 | 1414 | v.val = sk->sk_tsflags; |
---|
1203 | 1415 | break; |
---|
1204 | 1416 | |
---|
1205 | | - case SO_RCVTIMEO: |
---|
1206 | | - lv = sizeof(struct timeval); |
---|
1207 | | - if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) { |
---|
1208 | | - v.tm.tv_sec = 0; |
---|
1209 | | - v.tm.tv_usec = 0; |
---|
1210 | | - } else { |
---|
1211 | | - v.tm.tv_sec = sk->sk_rcvtimeo / HZ; |
---|
1212 | | - v.tm.tv_usec = ((sk->sk_rcvtimeo % HZ) * USEC_PER_SEC) / HZ; |
---|
1213 | | - } |
---|
| 1417 | + case SO_RCVTIMEO_OLD: |
---|
| 1418 | + case SO_RCVTIMEO_NEW: |
---|
| 1419 | + lv = sock_get_timeout(sk->sk_rcvtimeo, &v, SO_RCVTIMEO_OLD == optname); |
---|
1214 | 1420 | break; |
---|
1215 | 1421 | |
---|
1216 | | - case SO_SNDTIMEO: |
---|
1217 | | - lv = sizeof(struct timeval); |
---|
1218 | | - if (sk->sk_sndtimeo == MAX_SCHEDULE_TIMEOUT) { |
---|
1219 | | - v.tm.tv_sec = 0; |
---|
1220 | | - v.tm.tv_usec = 0; |
---|
1221 | | - } else { |
---|
1222 | | - v.tm.tv_sec = sk->sk_sndtimeo / HZ; |
---|
1223 | | - v.tm.tv_usec = ((sk->sk_sndtimeo % HZ) * USEC_PER_SEC) / HZ; |
---|
1224 | | - } |
---|
| 1422 | + case SO_SNDTIMEO_OLD: |
---|
| 1423 | + case SO_SNDTIMEO_NEW: |
---|
| 1424 | + lv = sock_get_timeout(sk->sk_sndtimeo, &v, SO_SNDTIMEO_OLD == optname); |
---|
1225 | 1425 | break; |
---|
1226 | 1426 | |
---|
1227 | 1427 | case SO_RCVLOWAT: |
---|
1228 | | - v.val = sk->sk_rcvlowat; |
---|
| 1428 | + v.val = READ_ONCE(sk->sk_rcvlowat); |
---|
1229 | 1429 | break; |
---|
1230 | 1430 | |
---|
1231 | 1431 | case SO_SNDLOWAT: |
---|
.. | .. |
---|
1319 | 1519 | if (!sock->ops->set_peek_off) |
---|
1320 | 1520 | return -EOPNOTSUPP; |
---|
1321 | 1521 | |
---|
1322 | | - v.val = sk->sk_peek_off; |
---|
| 1522 | + v.val = READ_ONCE(sk->sk_peek_off); |
---|
1323 | 1523 | break; |
---|
1324 | 1524 | case SO_NOFCS: |
---|
1325 | 1525 | v.val = sock_flag(sk, SOCK_NOFCS); |
---|
.. | .. |
---|
1349 | 1549 | |
---|
1350 | 1550 | #ifdef CONFIG_NET_RX_BUSY_POLL |
---|
1351 | 1551 | case SO_BUSY_POLL: |
---|
1352 | | - v.val = sk->sk_ll_usec; |
---|
| 1552 | + v.val = READ_ONCE(sk->sk_ll_usec); |
---|
1353 | 1553 | break; |
---|
1354 | 1554 | #endif |
---|
1355 | 1555 | |
---|
1356 | 1556 | case SO_MAX_PACING_RATE: |
---|
1357 | | - v.val = sk->sk_max_pacing_rate; |
---|
| 1557 | + /* The READ_ONCE() pair with the WRITE_ONCE() in sk_setsockopt() */ |
---|
| 1558 | + if (sizeof(v.ulval) != sizeof(v.val) && len >= sizeof(v.ulval)) { |
---|
| 1559 | + lv = sizeof(v.ulval); |
---|
| 1560 | + v.ulval = READ_ONCE(sk->sk_max_pacing_rate); |
---|
| 1561 | + } else { |
---|
| 1562 | + /* 32bit version */ |
---|
| 1563 | + v.val = min_t(unsigned long, ~0U, |
---|
| 1564 | + READ_ONCE(sk->sk_max_pacing_rate)); |
---|
| 1565 | + } |
---|
1358 | 1566 | break; |
---|
1359 | 1567 | |
---|
1360 | 1568 | case SO_INCOMING_CPU: |
---|
.. | .. |
---|
1405 | 1613 | SOF_TXTIME_REPORT_ERRORS : 0; |
---|
1406 | 1614 | break; |
---|
1407 | 1615 | |
---|
| 1616 | + case SO_BINDTOIFINDEX: |
---|
| 1617 | + v.val = sk->sk_bound_dev_if; |
---|
| 1618 | + break; |
---|
| 1619 | + |
---|
| 1620 | + case SO_NETNS_COOKIE: |
---|
| 1621 | + lv = sizeof(u64); |
---|
| 1622 | + if (len != lv) |
---|
| 1623 | + return -EINVAL; |
---|
| 1624 | + v.val64 = atomic64_read(&sock_net(sk)->net_cookie); |
---|
| 1625 | + break; |
---|
| 1626 | + |
---|
1408 | 1627 | default: |
---|
1409 | 1628 | /* We implement the SO_SNDLOWAT etc to not be settable |
---|
1410 | 1629 | * (1003.1g 7). |
---|
.. | .. |
---|
1452 | 1671 | */ |
---|
1453 | 1672 | static void sock_copy(struct sock *nsk, const struct sock *osk) |
---|
1454 | 1673 | { |
---|
| 1674 | + const struct proto *prot = READ_ONCE(osk->sk_prot); |
---|
1455 | 1675 | #ifdef CONFIG_SECURITY_NETWORK |
---|
1456 | 1676 | void *sptr = nsk->sk_security; |
---|
1457 | 1677 | #endif |
---|
1458 | 1678 | memcpy(nsk, osk, offsetof(struct sock, sk_dontcopy_begin)); |
---|
1459 | 1679 | |
---|
1460 | 1680 | memcpy(&nsk->sk_dontcopy_end, &osk->sk_dontcopy_end, |
---|
1461 | | - osk->sk_prot->obj_size - offsetof(struct sock, sk_dontcopy_end)); |
---|
| 1681 | + prot->obj_size - offsetof(struct sock, sk_dontcopy_end)); |
---|
1462 | 1682 | |
---|
1463 | 1683 | #ifdef CONFIG_SECURITY_NETWORK |
---|
1464 | 1684 | nsk->sk_security = sptr; |
---|
.. | .. |
---|
1584 | 1804 | |
---|
1585 | 1805 | sock_disable_timestamp(sk, SK_FLAGS_TIMESTAMP); |
---|
1586 | 1806 | |
---|
| 1807 | +#ifdef CONFIG_BPF_SYSCALL |
---|
| 1808 | + bpf_sk_storage_free(sk); |
---|
| 1809 | +#endif |
---|
| 1810 | + |
---|
1587 | 1811 | if (atomic_read(&sk->sk_omem_alloc)) |
---|
1588 | 1812 | pr_debug("%s: optmem leakage (%d bytes) detected\n", |
---|
1589 | 1813 | __func__, atomic_read(&sk->sk_omem_alloc)); |
---|
.. | .. |
---|
1670 | 1894 | */ |
---|
1671 | 1895 | struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) |
---|
1672 | 1896 | { |
---|
1673 | | - struct sock *newsk; |
---|
| 1897 | + struct proto *prot = READ_ONCE(sk->sk_prot); |
---|
| 1898 | + struct sk_filter *filter; |
---|
1674 | 1899 | bool is_charged = true; |
---|
| 1900 | + struct sock *newsk; |
---|
1675 | 1901 | |
---|
1676 | | - newsk = sk_prot_alloc(sk->sk_prot, priority, sk->sk_family); |
---|
1677 | | - if (newsk != NULL) { |
---|
1678 | | - struct sk_filter *filter; |
---|
| 1902 | + newsk = sk_prot_alloc(prot, priority, sk->sk_family); |
---|
| 1903 | + if (!newsk) |
---|
| 1904 | + goto out; |
---|
1679 | 1905 | |
---|
1680 | | - sock_copy(newsk, sk); |
---|
| 1906 | + sock_copy(newsk, sk); |
---|
1681 | 1907 | |
---|
1682 | | - newsk->sk_prot_creator = sk->sk_prot; |
---|
| 1908 | + newsk->sk_prot_creator = prot; |
---|
1683 | 1909 | |
---|
1684 | | - /* SANITY */ |
---|
1685 | | - if (likely(newsk->sk_net_refcnt)) |
---|
1686 | | - get_net(sock_net(newsk)); |
---|
1687 | | - sk_node_init(&newsk->sk_node); |
---|
1688 | | - sock_lock_init(newsk); |
---|
1689 | | - bh_lock_sock(newsk); |
---|
1690 | | - newsk->sk_backlog.head = newsk->sk_backlog.tail = NULL; |
---|
1691 | | - newsk->sk_backlog.len = 0; |
---|
1692 | | - |
---|
1693 | | - atomic_set(&newsk->sk_rmem_alloc, 0); |
---|
1694 | | - /* |
---|
1695 | | - * sk_wmem_alloc set to one (see sk_free() and sock_wfree()) |
---|
1696 | | - */ |
---|
1697 | | - refcount_set(&newsk->sk_wmem_alloc, 1); |
---|
1698 | | - atomic_set(&newsk->sk_omem_alloc, 0); |
---|
1699 | | - sk_init_common(newsk); |
---|
1700 | | - |
---|
1701 | | - newsk->sk_dst_cache = NULL; |
---|
1702 | | - newsk->sk_dst_pending_confirm = 0; |
---|
1703 | | - newsk->sk_wmem_queued = 0; |
---|
1704 | | - newsk->sk_forward_alloc = 0; |
---|
1705 | | - atomic_set(&newsk->sk_drops, 0); |
---|
1706 | | - newsk->sk_send_head = NULL; |
---|
1707 | | - newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK; |
---|
1708 | | - atomic_set(&newsk->sk_zckey, 0); |
---|
1709 | | - |
---|
1710 | | - sock_reset_flag(newsk, SOCK_DONE); |
---|
1711 | | - |
---|
1712 | | - /* sk->sk_memcg will be populated at accept() time */ |
---|
1713 | | - newsk->sk_memcg = NULL; |
---|
1714 | | - |
---|
1715 | | - cgroup_sk_clone(&newsk->sk_cgrp_data); |
---|
1716 | | - |
---|
1717 | | - rcu_read_lock(); |
---|
1718 | | - filter = rcu_dereference(sk->sk_filter); |
---|
1719 | | - if (filter != NULL) |
---|
1720 | | - /* though it's an empty new sock, the charging may fail |
---|
1721 | | - * if sysctl_optmem_max was changed between creation of |
---|
1722 | | - * original socket and cloning |
---|
1723 | | - */ |
---|
1724 | | - is_charged = sk_filter_charge(newsk, filter); |
---|
1725 | | - RCU_INIT_POINTER(newsk->sk_filter, filter); |
---|
1726 | | - rcu_read_unlock(); |
---|
1727 | | - |
---|
1728 | | - if (unlikely(!is_charged || xfrm_sk_clone_policy(newsk, sk))) { |
---|
1729 | | - /* We need to make sure that we don't uncharge the new |
---|
1730 | | - * socket if we couldn't charge it in the first place |
---|
1731 | | - * as otherwise we uncharge the parent's filter. |
---|
1732 | | - */ |
---|
1733 | | - if (!is_charged) |
---|
1734 | | - RCU_INIT_POINTER(newsk->sk_filter, NULL); |
---|
1735 | | - sk_free_unlock_clone(newsk); |
---|
1736 | | - newsk = NULL; |
---|
1737 | | - goto out; |
---|
1738 | | - } |
---|
1739 | | - RCU_INIT_POINTER(newsk->sk_reuseport_cb, NULL); |
---|
1740 | | - |
---|
1741 | | - newsk->sk_err = 0; |
---|
1742 | | - newsk->sk_err_soft = 0; |
---|
1743 | | - newsk->sk_priority = 0; |
---|
1744 | | - newsk->sk_incoming_cpu = raw_smp_processor_id(); |
---|
1745 | | - atomic64_set(&newsk->sk_cookie, 0); |
---|
1746 | | - if (likely(newsk->sk_net_refcnt)) |
---|
1747 | | - sock_inuse_add(sock_net(newsk), 1); |
---|
1748 | | - |
---|
1749 | | - /* |
---|
1750 | | - * Before updating sk_refcnt, we must commit prior changes to memory |
---|
1751 | | - * (Documentation/RCU/rculist_nulls.txt for details) |
---|
1752 | | - */ |
---|
1753 | | - smp_wmb(); |
---|
1754 | | - refcount_set(&newsk->sk_refcnt, 2); |
---|
1755 | | - |
---|
1756 | | - /* |
---|
1757 | | - * Increment the counter in the same struct proto as the master |
---|
1758 | | - * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that |
---|
1759 | | - * is the same as sk->sk_prot->socks, as this field was copied |
---|
1760 | | - * with memcpy). |
---|
1761 | | - * |
---|
1762 | | - * This _changes_ the previous behaviour, where |
---|
1763 | | - * tcp_create_openreq_child always was incrementing the |
---|
1764 | | - * equivalent to tcp_prot->socks (inet_sock_nr), so this have |
---|
1765 | | - * to be taken into account in all callers. -acme |
---|
1766 | | - */ |
---|
1767 | | - sk_refcnt_debug_inc(newsk); |
---|
1768 | | - sk_set_socket(newsk, NULL); |
---|
1769 | | - sk_tx_queue_clear(newsk); |
---|
1770 | | - newsk->sk_wq = NULL; |
---|
1771 | | - |
---|
1772 | | - if (newsk->sk_prot->sockets_allocated) |
---|
1773 | | - sk_sockets_allocated_inc(newsk); |
---|
1774 | | - |
---|
1775 | | - if (sock_needs_netstamp(sk) && |
---|
1776 | | - newsk->sk_flags & SK_FLAGS_TIMESTAMP) |
---|
1777 | | - net_enable_timestamp(); |
---|
| 1910 | + /* SANITY */ |
---|
| 1911 | + if (likely(newsk->sk_net_refcnt)) { |
---|
| 1912 | + get_net(sock_net(newsk)); |
---|
| 1913 | + sock_inuse_add(sock_net(newsk), 1); |
---|
1778 | 1914 | } |
---|
| 1915 | + sk_node_init(&newsk->sk_node); |
---|
| 1916 | + sock_lock_init(newsk); |
---|
| 1917 | + bh_lock_sock(newsk); |
---|
| 1918 | + newsk->sk_backlog.head = newsk->sk_backlog.tail = NULL; |
---|
| 1919 | + newsk->sk_backlog.len = 0; |
---|
| 1920 | + |
---|
| 1921 | + atomic_set(&newsk->sk_rmem_alloc, 0); |
---|
| 1922 | + |
---|
| 1923 | + /* sk_wmem_alloc set to one (see sk_free() and sock_wfree()) */ |
---|
| 1924 | + refcount_set(&newsk->sk_wmem_alloc, 1); |
---|
| 1925 | + |
---|
| 1926 | + atomic_set(&newsk->sk_omem_alloc, 0); |
---|
| 1927 | + sk_init_common(newsk); |
---|
| 1928 | + |
---|
| 1929 | + newsk->sk_dst_cache = NULL; |
---|
| 1930 | + newsk->sk_dst_pending_confirm = 0; |
---|
| 1931 | + newsk->sk_wmem_queued = 0; |
---|
| 1932 | + newsk->sk_forward_alloc = 0; |
---|
| 1933 | + atomic_set(&newsk->sk_drops, 0); |
---|
| 1934 | + newsk->sk_send_head = NULL; |
---|
| 1935 | + newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK; |
---|
| 1936 | + atomic_set(&newsk->sk_zckey, 0); |
---|
| 1937 | + |
---|
| 1938 | + sock_reset_flag(newsk, SOCK_DONE); |
---|
| 1939 | + |
---|
| 1940 | + /* sk->sk_memcg will be populated at accept() time */ |
---|
| 1941 | + newsk->sk_memcg = NULL; |
---|
| 1942 | + |
---|
| 1943 | + cgroup_sk_clone(&newsk->sk_cgrp_data); |
---|
| 1944 | + |
---|
| 1945 | + rcu_read_lock(); |
---|
| 1946 | + filter = rcu_dereference(sk->sk_filter); |
---|
| 1947 | + if (filter != NULL) |
---|
| 1948 | + /* though it's an empty new sock, the charging may fail |
---|
| 1949 | + * if sysctl_optmem_max was changed between creation of |
---|
| 1950 | + * original socket and cloning |
---|
| 1951 | + */ |
---|
| 1952 | + is_charged = sk_filter_charge(newsk, filter); |
---|
| 1953 | + RCU_INIT_POINTER(newsk->sk_filter, filter); |
---|
| 1954 | + rcu_read_unlock(); |
---|
| 1955 | + |
---|
| 1956 | + if (unlikely(!is_charged || xfrm_sk_clone_policy(newsk, sk))) { |
---|
| 1957 | + /* We need to make sure that we don't uncharge the new |
---|
| 1958 | + * socket if we couldn't charge it in the first place |
---|
| 1959 | + * as otherwise we uncharge the parent's filter. |
---|
| 1960 | + */ |
---|
| 1961 | + if (!is_charged) |
---|
| 1962 | + RCU_INIT_POINTER(newsk->sk_filter, NULL); |
---|
| 1963 | + sk_free_unlock_clone(newsk); |
---|
| 1964 | + newsk = NULL; |
---|
| 1965 | + goto out; |
---|
| 1966 | + } |
---|
| 1967 | + RCU_INIT_POINTER(newsk->sk_reuseport_cb, NULL); |
---|
| 1968 | + |
---|
| 1969 | + if (bpf_sk_storage_clone(sk, newsk)) { |
---|
| 1970 | + sk_free_unlock_clone(newsk); |
---|
| 1971 | + newsk = NULL; |
---|
| 1972 | + goto out; |
---|
| 1973 | + } |
---|
| 1974 | + |
---|
| 1975 | + /* Clear sk_user_data if parent had the pointer tagged |
---|
| 1976 | + * as not suitable for copying when cloning. |
---|
| 1977 | + */ |
---|
| 1978 | + if (sk_user_data_is_nocopy(newsk)) |
---|
| 1979 | + newsk->sk_user_data = NULL; |
---|
| 1980 | + |
---|
| 1981 | + newsk->sk_err = 0; |
---|
| 1982 | + newsk->sk_err_soft = 0; |
---|
| 1983 | + newsk->sk_priority = 0; |
---|
| 1984 | + newsk->sk_incoming_cpu = raw_smp_processor_id(); |
---|
| 1985 | + |
---|
| 1986 | + /* Before updating sk_refcnt, we must commit prior changes to memory |
---|
| 1987 | + * (Documentation/RCU/rculist_nulls.rst for details) |
---|
| 1988 | + */ |
---|
| 1989 | + smp_wmb(); |
---|
| 1990 | + refcount_set(&newsk->sk_refcnt, 2); |
---|
| 1991 | + |
---|
| 1992 | + /* Increment the counter in the same struct proto as the master |
---|
| 1993 | + * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that |
---|
| 1994 | + * is the same as sk->sk_prot->socks, as this field was copied |
---|
| 1995 | + * with memcpy). |
---|
| 1996 | + * |
---|
| 1997 | + * This _changes_ the previous behaviour, where |
---|
| 1998 | + * tcp_create_openreq_child always was incrementing the |
---|
| 1999 | + * equivalent to tcp_prot->socks (inet_sock_nr), so this have |
---|
| 2000 | + * to be taken into account in all callers. -acme |
---|
| 2001 | + */ |
---|
| 2002 | + sk_refcnt_debug_inc(newsk); |
---|
| 2003 | + sk_set_socket(newsk, NULL); |
---|
| 2004 | + sk_tx_queue_clear(newsk); |
---|
| 2005 | + RCU_INIT_POINTER(newsk->sk_wq, NULL); |
---|
| 2006 | + |
---|
| 2007 | + if (newsk->sk_prot->sockets_allocated) |
---|
| 2008 | + sk_sockets_allocated_inc(newsk); |
---|
| 2009 | + |
---|
| 2010 | + if (sock_needs_netstamp(sk) && newsk->sk_flags & SK_FLAGS_TIMESTAMP) |
---|
| 2011 | + net_enable_timestamp(); |
---|
1779 | 2012 | out: |
---|
1780 | 2013 | return newsk; |
---|
1781 | 2014 | } |
---|
.. | .. |
---|
1795 | 2028 | { |
---|
1796 | 2029 | u32 max_segs = 1; |
---|
1797 | 2030 | |
---|
1798 | | - sk_dst_set(sk, dst); |
---|
1799 | 2031 | sk->sk_route_caps = dst->dev->features | sk->sk_route_forced_caps; |
---|
1800 | 2032 | if (sk->sk_route_caps & NETIF_F_GSO) |
---|
1801 | 2033 | sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE; |
---|
.. | .. |
---|
1810 | 2042 | } |
---|
1811 | 2043 | } |
---|
1812 | 2044 | sk->sk_gso_max_segs = max_segs; |
---|
| 2045 | + sk_dst_set(sk, dst); |
---|
1813 | 2046 | } |
---|
1814 | 2047 | EXPORT_SYMBOL_GPL(sk_setup_caps); |
---|
1815 | 2048 | |
---|
.. | .. |
---|
1877 | 2110 | } |
---|
1878 | 2111 | EXPORT_SYMBOL(skb_set_owner_w); |
---|
1879 | 2112 | |
---|
| 2113 | +static bool can_skb_orphan_partial(const struct sk_buff *skb) |
---|
| 2114 | +{ |
---|
| 2115 | +#ifdef CONFIG_TLS_DEVICE |
---|
| 2116 | + /* Drivers depend on in-order delivery for crypto offload, |
---|
| 2117 | + * partial orphan breaks out-of-order-OK logic. |
---|
| 2118 | + */ |
---|
| 2119 | + if (skb->decrypted) |
---|
| 2120 | + return false; |
---|
| 2121 | +#endif |
---|
| 2122 | + return (skb->destructor == sock_wfree || |
---|
| 2123 | + (IS_ENABLED(CONFIG_INET) && skb->destructor == tcp_wfree)); |
---|
| 2124 | +} |
---|
| 2125 | + |
---|
1880 | 2126 | /* This helper is used by netem, as it can hold packets in its |
---|
1881 | 2127 | * delay queue. We want to allow the owner socket to send more |
---|
1882 | 2128 | * packets, as if they were already TX completed by a typical driver. |
---|
.. | .. |
---|
1888 | 2134 | if (skb_is_tcp_pure_ack(skb)) |
---|
1889 | 2135 | return; |
---|
1890 | 2136 | |
---|
1891 | | - if (skb->destructor == sock_wfree |
---|
1892 | | -#ifdef CONFIG_INET |
---|
1893 | | - || skb->destructor == tcp_wfree |
---|
1894 | | -#endif |
---|
1895 | | - ) { |
---|
1896 | | - struct sock *sk = skb->sk; |
---|
| 2137 | + if (can_skb_orphan_partial(skb) && skb_set_owner_sk_safe(skb, skb->sk)) |
---|
| 2138 | + return; |
---|
1897 | 2139 | |
---|
1898 | | - if (refcount_inc_not_zero(&sk->sk_refcnt)) { |
---|
1899 | | - WARN_ON(refcount_sub_and_test(skb->truesize, &sk->sk_wmem_alloc)); |
---|
1900 | | - skb->destructor = sock_efree; |
---|
1901 | | - } |
---|
1902 | | - } else { |
---|
1903 | | - skb_orphan(skb); |
---|
1904 | | - } |
---|
| 2140 | + skb_orphan(skb); |
---|
1905 | 2141 | } |
---|
1906 | 2142 | EXPORT_SYMBOL(skb_orphan_partial); |
---|
1907 | 2143 | |
---|
.. | .. |
---|
1928 | 2164 | } |
---|
1929 | 2165 | EXPORT_SYMBOL(sock_efree); |
---|
1930 | 2166 | |
---|
| 2167 | +/* Buffer destructor for prefetch/receive path where reference count may |
---|
| 2168 | + * not be held, e.g. for listen sockets. |
---|
| 2169 | + */ |
---|
| 2170 | +#ifdef CONFIG_INET |
---|
| 2171 | +void sock_pfree(struct sk_buff *skb) |
---|
| 2172 | +{ |
---|
| 2173 | + if (sk_is_refcounted(skb->sk)) |
---|
| 2174 | + sock_gen_put(skb->sk); |
---|
| 2175 | +} |
---|
| 2176 | +EXPORT_SYMBOL(sock_pfree); |
---|
| 2177 | +#endif /* CONFIG_INET */ |
---|
| 2178 | + |
---|
1931 | 2179 | kuid_t sock_i_uid(struct sock *sk) |
---|
1932 | 2180 | { |
---|
1933 | 2181 | kuid_t uid; |
---|
.. | .. |
---|
1939 | 2187 | } |
---|
1940 | 2188 | EXPORT_SYMBOL(sock_i_uid); |
---|
1941 | 2189 | |
---|
| 2190 | +unsigned long __sock_i_ino(struct sock *sk) |
---|
| 2191 | +{ |
---|
| 2192 | + unsigned long ino; |
---|
| 2193 | + |
---|
| 2194 | + read_lock(&sk->sk_callback_lock); |
---|
| 2195 | + ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0; |
---|
| 2196 | + read_unlock(&sk->sk_callback_lock); |
---|
| 2197 | + return ino; |
---|
| 2198 | +} |
---|
| 2199 | +EXPORT_SYMBOL(__sock_i_ino); |
---|
| 2200 | + |
---|
1942 | 2201 | unsigned long sock_i_ino(struct sock *sk) |
---|
1943 | 2202 | { |
---|
1944 | 2203 | unsigned long ino; |
---|
1945 | 2204 | |
---|
1946 | | - read_lock_bh(&sk->sk_callback_lock); |
---|
1947 | | - ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0; |
---|
1948 | | - read_unlock_bh(&sk->sk_callback_lock); |
---|
| 2205 | + local_bh_disable(); |
---|
| 2206 | + ino = __sock_i_ino(sk); |
---|
| 2207 | + local_bh_enable(); |
---|
1949 | 2208 | return ino; |
---|
1950 | 2209 | } |
---|
1951 | 2210 | EXPORT_SYMBOL(sock_i_ino); |
---|
.. | .. |
---|
1956 | 2215 | struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, |
---|
1957 | 2216 | gfp_t priority) |
---|
1958 | 2217 | { |
---|
1959 | | - if (force || refcount_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) { |
---|
| 2218 | + if (force || |
---|
| 2219 | + refcount_read(&sk->sk_wmem_alloc) < READ_ONCE(sk->sk_sndbuf)) { |
---|
1960 | 2220 | struct sk_buff *skb = alloc_skb(size, priority); |
---|
| 2221 | + |
---|
1961 | 2222 | if (skb) { |
---|
1962 | 2223 | skb_set_owner_w(skb, sk); |
---|
1963 | 2224 | return skb; |
---|
.. | .. |
---|
1981 | 2242 | |
---|
1982 | 2243 | /* small safe race: SKB_TRUESIZE may differ from final skb->truesize */ |
---|
1983 | 2244 | if (atomic_read(&sk->sk_omem_alloc) + SKB_TRUESIZE(size) > |
---|
1984 | | - sysctl_optmem_max) |
---|
| 2245 | + READ_ONCE(sysctl_optmem_max)) |
---|
1985 | 2246 | return NULL; |
---|
1986 | 2247 | |
---|
1987 | 2248 | skb = alloc_skb(size, priority); |
---|
.. | .. |
---|
1999 | 2260 | */ |
---|
2000 | 2261 | void *sock_kmalloc(struct sock *sk, int size, gfp_t priority) |
---|
2001 | 2262 | { |
---|
2002 | | - if ((unsigned int)size <= sysctl_optmem_max && |
---|
2003 | | - atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) { |
---|
| 2263 | + int optmem_max = READ_ONCE(sysctl_optmem_max); |
---|
| 2264 | + |
---|
| 2265 | + if ((unsigned int)size <= optmem_max && |
---|
| 2266 | + atomic_read(&sk->sk_omem_alloc) + size < optmem_max) { |
---|
2004 | 2267 | void *mem; |
---|
2005 | 2268 | /* First do the add, to avoid the race if kmalloc |
---|
2006 | 2269 | * might sleep. |
---|
.. | .. |
---|
2025 | 2288 | if (WARN_ON_ONCE(!mem)) |
---|
2026 | 2289 | return; |
---|
2027 | 2290 | if (nullify) |
---|
2028 | | - kzfree(mem); |
---|
| 2291 | + kfree_sensitive(mem); |
---|
2029 | 2292 | else |
---|
2030 | 2293 | kfree(mem); |
---|
2031 | 2294 | atomic_sub(size, &sk->sk_omem_alloc); |
---|
.. | .. |
---|
2058 | 2321 | break; |
---|
2059 | 2322 | set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); |
---|
2060 | 2323 | prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); |
---|
2061 | | - if (refcount_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) |
---|
| 2324 | + if (refcount_read(&sk->sk_wmem_alloc) < READ_ONCE(sk->sk_sndbuf)) |
---|
2062 | 2325 | break; |
---|
2063 | | - if (sk->sk_shutdown & SEND_SHUTDOWN) |
---|
| 2326 | + if (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN) |
---|
2064 | 2327 | break; |
---|
2065 | | - if (sk->sk_err) |
---|
| 2328 | + if (READ_ONCE(sk->sk_err)) |
---|
2066 | 2329 | break; |
---|
2067 | 2330 | timeo = schedule_timeout(timeo); |
---|
2068 | 2331 | } |
---|
.. | .. |
---|
2090 | 2353 | goto failure; |
---|
2091 | 2354 | |
---|
2092 | 2355 | err = -EPIPE; |
---|
2093 | | - if (sk->sk_shutdown & SEND_SHUTDOWN) |
---|
| 2356 | + if (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN) |
---|
2094 | 2357 | goto failure; |
---|
2095 | 2358 | |
---|
2096 | | - if (sk_wmem_alloc_get(sk) < sk->sk_sndbuf) |
---|
| 2359 | + if (sk_wmem_alloc_get(sk) < READ_ONCE(sk->sk_sndbuf)) |
---|
2097 | 2360 | break; |
---|
2098 | 2361 | |
---|
2099 | 2362 | sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); |
---|
.. | .. |
---|
2139 | 2402 | return -EINVAL; |
---|
2140 | 2403 | sockc->mark = *(u32 *)CMSG_DATA(cmsg); |
---|
2141 | 2404 | break; |
---|
2142 | | - case SO_TIMESTAMPING: |
---|
| 2405 | + case SO_TIMESTAMPING_OLD: |
---|
2143 | 2406 | if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32))) |
---|
2144 | 2407 | return -EINVAL; |
---|
2145 | 2408 | |
---|
.. | .. |
---|
2207 | 2470 | } |
---|
2208 | 2471 | } |
---|
2209 | 2472 | |
---|
2210 | | -/* On 32bit arches, an skb frag is limited to 2^15 */ |
---|
2211 | 2473 | #define SKB_FRAG_PAGE_ORDER get_order(32768) |
---|
| 2474 | +DEFINE_STATIC_KEY_FALSE(net_high_order_alloc_disable_key); |
---|
2212 | 2475 | |
---|
2213 | 2476 | /** |
---|
2214 | 2477 | * skb_page_frag_refill - check that a page_frag contains enough room |
---|
.. | .. |
---|
2233 | 2496 | } |
---|
2234 | 2497 | |
---|
2235 | 2498 | pfrag->offset = 0; |
---|
2236 | | - if (SKB_FRAG_PAGE_ORDER) { |
---|
| 2499 | + if (SKB_FRAG_PAGE_ORDER && |
---|
| 2500 | + !static_branch_unlikely(&net_high_order_alloc_disable_key)) { |
---|
2237 | 2501 | /* Avoid direct reclaim but allow kswapd to wake */ |
---|
2238 | 2502 | pfrag->page = alloc_pages((gfp & ~__GFP_DIRECT_RECLAIM) | |
---|
2239 | 2503 | __GFP_COMP | __GFP_NOWARN | |
---|
.. | .. |
---|
2263 | 2527 | return false; |
---|
2264 | 2528 | } |
---|
2265 | 2529 | EXPORT_SYMBOL(sk_page_frag_refill); |
---|
2266 | | - |
---|
2267 | | -int sk_alloc_sg(struct sock *sk, int len, struct scatterlist *sg, |
---|
2268 | | - int sg_start, int *sg_curr_index, unsigned int *sg_curr_size, |
---|
2269 | | - int first_coalesce) |
---|
2270 | | -{ |
---|
2271 | | - int sg_curr = *sg_curr_index, use = 0, rc = 0; |
---|
2272 | | - unsigned int size = *sg_curr_size; |
---|
2273 | | - struct page_frag *pfrag; |
---|
2274 | | - struct scatterlist *sge; |
---|
2275 | | - |
---|
2276 | | - len -= size; |
---|
2277 | | - pfrag = sk_page_frag(sk); |
---|
2278 | | - |
---|
2279 | | - while (len > 0) { |
---|
2280 | | - unsigned int orig_offset; |
---|
2281 | | - |
---|
2282 | | - if (!sk_page_frag_refill(sk, pfrag)) { |
---|
2283 | | - rc = -ENOMEM; |
---|
2284 | | - goto out; |
---|
2285 | | - } |
---|
2286 | | - |
---|
2287 | | - use = min_t(int, len, pfrag->size - pfrag->offset); |
---|
2288 | | - |
---|
2289 | | - if (!sk_wmem_schedule(sk, use)) { |
---|
2290 | | - rc = -ENOMEM; |
---|
2291 | | - goto out; |
---|
2292 | | - } |
---|
2293 | | - |
---|
2294 | | - sk_mem_charge(sk, use); |
---|
2295 | | - size += use; |
---|
2296 | | - orig_offset = pfrag->offset; |
---|
2297 | | - pfrag->offset += use; |
---|
2298 | | - |
---|
2299 | | - sge = sg + sg_curr - 1; |
---|
2300 | | - if (sg_curr > first_coalesce && sg_page(sge) == pfrag->page && |
---|
2301 | | - sge->offset + sge->length == orig_offset) { |
---|
2302 | | - sge->length += use; |
---|
2303 | | - } else { |
---|
2304 | | - sge = sg + sg_curr; |
---|
2305 | | - sg_unmark_end(sge); |
---|
2306 | | - sg_set_page(sge, pfrag->page, use, orig_offset); |
---|
2307 | | - get_page(pfrag->page); |
---|
2308 | | - sg_curr++; |
---|
2309 | | - |
---|
2310 | | - if (sg_curr == MAX_SKB_FRAGS) |
---|
2311 | | - sg_curr = 0; |
---|
2312 | | - |
---|
2313 | | - if (sg_curr == sg_start) { |
---|
2314 | | - rc = -ENOSPC; |
---|
2315 | | - break; |
---|
2316 | | - } |
---|
2317 | | - } |
---|
2318 | | - |
---|
2319 | | - len -= use; |
---|
2320 | | - } |
---|
2321 | | -out: |
---|
2322 | | - *sg_curr_size = size; |
---|
2323 | | - *sg_curr_index = sg_curr; |
---|
2324 | | - return rc; |
---|
2325 | | -} |
---|
2326 | | -EXPORT_SYMBOL(sk_alloc_sg); |
---|
2327 | 2530 | |
---|
2328 | 2531 | static void __lock_sock(struct sock *sk) |
---|
2329 | 2532 | __releases(&sk->sk_lock.slock) |
---|
.. | .. |
---|
2358 | 2561 | next = skb->next; |
---|
2359 | 2562 | prefetch(next); |
---|
2360 | 2563 | WARN_ON_ONCE(skb_dst_is_noref(skb)); |
---|
2361 | | - skb->next = NULL; |
---|
| 2564 | + skb_mark_not_on_list(skb); |
---|
2362 | 2565 | sk_backlog_rcv(sk, skb); |
---|
2363 | 2566 | |
---|
2364 | 2567 | cond_resched(); |
---|
.. | .. |
---|
2530 | 2733 | if (mem_cgroup_sockets_enabled && sk->sk_memcg) |
---|
2531 | 2734 | mem_cgroup_uncharge_skmem(sk->sk_memcg, amount); |
---|
2532 | 2735 | |
---|
2533 | | - if (sk_under_memory_pressure(sk) && |
---|
| 2736 | + if (sk_under_global_memory_pressure(sk) && |
---|
2534 | 2737 | (sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0))) |
---|
2535 | 2738 | sk_leave_memory_pressure(sk); |
---|
2536 | 2739 | } |
---|
.. | .. |
---|
2551 | 2754 | |
---|
2552 | 2755 | int sk_set_peek_off(struct sock *sk, int val) |
---|
2553 | 2756 | { |
---|
2554 | | - sk->sk_peek_off = val; |
---|
| 2757 | + WRITE_ONCE(sk->sk_peek_off, val); |
---|
2555 | 2758 | return 0; |
---|
2556 | 2759 | } |
---|
2557 | 2760 | EXPORT_SYMBOL_GPL(sk_set_peek_off); |
---|
.. | .. |
---|
2613 | 2816 | return -EOPNOTSUPP; |
---|
2614 | 2817 | } |
---|
2615 | 2818 | EXPORT_SYMBOL(sock_no_shutdown); |
---|
2616 | | - |
---|
2617 | | -int sock_no_setsockopt(struct socket *sock, int level, int optname, |
---|
2618 | | - char __user *optval, unsigned int optlen) |
---|
2619 | | -{ |
---|
2620 | | - return -EOPNOTSUPP; |
---|
2621 | | -} |
---|
2622 | | -EXPORT_SYMBOL(sock_no_setsockopt); |
---|
2623 | | - |
---|
2624 | | -int sock_no_getsockopt(struct socket *sock, int level, int optname, |
---|
2625 | | - char __user *optval, int __user *optlen) |
---|
2626 | | -{ |
---|
2627 | | - return -EOPNOTSUPP; |
---|
2628 | | -} |
---|
2629 | | -EXPORT_SYMBOL(sock_no_getsockopt); |
---|
2630 | 2819 | |
---|
2631 | 2820 | int sock_no_sendmsg(struct socket *sock, struct msghdr *m, size_t len) |
---|
2632 | 2821 | { |
---|
.. | .. |
---|
2732 | 2921 | rcu_read_unlock(); |
---|
2733 | 2922 | } |
---|
2734 | 2923 | |
---|
2735 | | -static void sock_def_readable(struct sock *sk) |
---|
| 2924 | +void sock_def_readable(struct sock *sk) |
---|
2736 | 2925 | { |
---|
2737 | 2926 | struct socket_wq *wq; |
---|
2738 | 2927 | |
---|
2739 | 2928 | rcu_read_lock(); |
---|
2740 | 2929 | wq = rcu_dereference(sk->sk_wq); |
---|
2741 | | - if (skwq_has_sleeper(wq)) |
---|
| 2930 | + |
---|
| 2931 | + if (skwq_has_sleeper(wq)) { |
---|
| 2932 | + int done = 0; |
---|
| 2933 | + |
---|
| 2934 | + trace_android_vh_do_wake_up_sync(&wq->wait, &done); |
---|
| 2935 | + if (done) |
---|
| 2936 | + goto out; |
---|
| 2937 | + |
---|
2742 | 2938 | wake_up_interruptible_sync_poll(&wq->wait, EPOLLIN | EPOLLPRI | |
---|
2743 | 2939 | EPOLLRDNORM | EPOLLRDBAND); |
---|
| 2940 | + } |
---|
| 2941 | + |
---|
| 2942 | +out: |
---|
2744 | 2943 | sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN); |
---|
2745 | 2944 | rcu_read_unlock(); |
---|
2746 | 2945 | } |
---|
.. | .. |
---|
2754 | 2953 | /* Do not wake up a writer until he can make "significant" |
---|
2755 | 2954 | * progress. --DaveM |
---|
2756 | 2955 | */ |
---|
2757 | | - if ((refcount_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) { |
---|
| 2956 | + if ((refcount_read(&sk->sk_wmem_alloc) << 1) <= READ_ONCE(sk->sk_sndbuf)) { |
---|
2758 | 2957 | wq = rcu_dereference(sk->sk_wq); |
---|
2759 | 2958 | if (skwq_has_sleeper(wq)) |
---|
2760 | 2959 | wake_up_interruptible_sync_poll(&wq->wait, EPOLLOUT | |
---|
.. | .. |
---|
2795 | 2994 | } |
---|
2796 | 2995 | EXPORT_SYMBOL(sk_stop_timer); |
---|
2797 | 2996 | |
---|
2798 | | -void sock_init_data(struct socket *sock, struct sock *sk) |
---|
| 2997 | +void sk_stop_timer_sync(struct sock *sk, struct timer_list *timer) |
---|
| 2998 | +{ |
---|
| 2999 | + if (del_timer_sync(timer)) |
---|
| 3000 | + __sock_put(sk); |
---|
| 3001 | +} |
---|
| 3002 | +EXPORT_SYMBOL(sk_stop_timer_sync); |
---|
| 3003 | + |
---|
| 3004 | +void sock_init_data_uid(struct socket *sock, struct sock *sk, kuid_t uid) |
---|
2799 | 3005 | { |
---|
2800 | 3006 | sk_init_common(sk); |
---|
2801 | 3007 | sk->sk_send_head = NULL; |
---|
.. | .. |
---|
2803 | 3009 | timer_setup(&sk->sk_timer, NULL, 0); |
---|
2804 | 3010 | |
---|
2805 | 3011 | sk->sk_allocation = GFP_KERNEL; |
---|
2806 | | - sk->sk_rcvbuf = sysctl_rmem_default; |
---|
2807 | | - sk->sk_sndbuf = sysctl_wmem_default; |
---|
| 3012 | + sk->sk_rcvbuf = READ_ONCE(sysctl_rmem_default); |
---|
| 3013 | + sk->sk_sndbuf = READ_ONCE(sysctl_wmem_default); |
---|
2808 | 3014 | sk->sk_state = TCP_CLOSE; |
---|
2809 | 3015 | sk_set_socket(sk, sock); |
---|
2810 | 3016 | |
---|
.. | .. |
---|
2812 | 3018 | |
---|
2813 | 3019 | if (sock) { |
---|
2814 | 3020 | sk->sk_type = sock->type; |
---|
2815 | | - sk->sk_wq = sock->wq; |
---|
| 3021 | + RCU_INIT_POINTER(sk->sk_wq, &sock->wq); |
---|
2816 | 3022 | sock->sk = sk; |
---|
2817 | | - sk->sk_uid = SOCK_INODE(sock)->i_uid; |
---|
2818 | 3023 | } else { |
---|
2819 | | - sk->sk_wq = NULL; |
---|
2820 | | - sk->sk_uid = make_kuid(sock_net(sk)->user_ns, 0); |
---|
| 3024 | + RCU_INIT_POINTER(sk->sk_wq, NULL); |
---|
2821 | 3025 | } |
---|
| 3026 | + sk->sk_uid = uid; |
---|
2822 | 3027 | |
---|
2823 | 3028 | rwlock_init(&sk->sk_callback_lock); |
---|
2824 | 3029 | if (sk->sk_kern_sock) |
---|
.. | .. |
---|
2859 | 3064 | |
---|
2860 | 3065 | #ifdef CONFIG_NET_RX_BUSY_POLL |
---|
2861 | 3066 | sk->sk_napi_id = 0; |
---|
2862 | | - sk->sk_ll_usec = sysctl_net_busy_read; |
---|
| 3067 | + sk->sk_ll_usec = READ_ONCE(sysctl_net_busy_read); |
---|
2863 | 3068 | #endif |
---|
2864 | 3069 | |
---|
2865 | | - sk->sk_max_pacing_rate = ~0U; |
---|
2866 | | - sk->sk_pacing_rate = ~0U; |
---|
2867 | | - sk->sk_pacing_shift = 10; |
---|
| 3070 | + sk->sk_max_pacing_rate = ~0UL; |
---|
| 3071 | + sk->sk_pacing_rate = ~0UL; |
---|
| 3072 | + WRITE_ONCE(sk->sk_pacing_shift, 10); |
---|
2868 | 3073 | sk->sk_incoming_cpu = -1; |
---|
2869 | 3074 | |
---|
2870 | 3075 | sk_rx_queue_clear(sk); |
---|
2871 | 3076 | /* |
---|
2872 | 3077 | * Before updating sk_refcnt, we must commit prior changes to memory |
---|
2873 | | - * (Documentation/RCU/rculist_nulls.txt for details) |
---|
| 3078 | + * (Documentation/RCU/rculist_nulls.rst for details) |
---|
2874 | 3079 | */ |
---|
2875 | 3080 | smp_wmb(); |
---|
2876 | 3081 | refcount_set(&sk->sk_refcnt, 1); |
---|
2877 | 3082 | atomic_set(&sk->sk_drops, 0); |
---|
| 3083 | +} |
---|
| 3084 | +EXPORT_SYMBOL(sock_init_data_uid); |
---|
| 3085 | + |
---|
| 3086 | +void sock_init_data(struct socket *sock, struct sock *sk) |
---|
| 3087 | +{ |
---|
| 3088 | + kuid_t uid = sock ? |
---|
| 3089 | + SOCK_INODE(sock)->i_uid : |
---|
| 3090 | + make_kuid(sock_net(sk)->user_ns, 0); |
---|
| 3091 | + |
---|
| 3092 | + sock_init_data_uid(sock, sk, uid); |
---|
2878 | 3093 | } |
---|
2879 | 3094 | EXPORT_SYMBOL(sock_init_data); |
---|
2880 | 3095 | |
---|
.. | .. |
---|
2949 | 3164 | } |
---|
2950 | 3165 | EXPORT_SYMBOL(lock_sock_fast); |
---|
2951 | 3166 | |
---|
2952 | | -int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp) |
---|
| 3167 | +int sock_gettstamp(struct socket *sock, void __user *userstamp, |
---|
| 3168 | + bool timeval, bool time32) |
---|
2953 | 3169 | { |
---|
2954 | | - struct timeval tv; |
---|
| 3170 | + struct sock *sk = sock->sk; |
---|
| 3171 | + struct timespec64 ts; |
---|
2955 | 3172 | |
---|
2956 | 3173 | sock_enable_timestamp(sk, SOCK_TIMESTAMP); |
---|
2957 | | - tv = ktime_to_timeval(sock_read_timestamp(sk)); |
---|
2958 | | - if (tv.tv_sec == -1) |
---|
2959 | | - return -ENOENT; |
---|
2960 | | - if (tv.tv_sec == 0) { |
---|
2961 | | - ktime_t kt = ktime_get_real(); |
---|
2962 | | - sock_write_timestamp(sk, kt); |
---|
2963 | | - tv = ktime_to_timeval(kt); |
---|
2964 | | - } |
---|
2965 | | - return copy_to_user(userstamp, &tv, sizeof(tv)) ? -EFAULT : 0; |
---|
2966 | | -} |
---|
2967 | | -EXPORT_SYMBOL(sock_get_timestamp); |
---|
2968 | | - |
---|
2969 | | -int sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp) |
---|
2970 | | -{ |
---|
2971 | | - struct timespec ts; |
---|
2972 | | - |
---|
2973 | | - sock_enable_timestamp(sk, SOCK_TIMESTAMP); |
---|
2974 | | - ts = ktime_to_timespec(sock_read_timestamp(sk)); |
---|
| 3174 | + ts = ktime_to_timespec64(sock_read_timestamp(sk)); |
---|
2975 | 3175 | if (ts.tv_sec == -1) |
---|
2976 | 3176 | return -ENOENT; |
---|
2977 | 3177 | if (ts.tv_sec == 0) { |
---|
2978 | 3178 | ktime_t kt = ktime_get_real(); |
---|
2979 | 3179 | sock_write_timestamp(sk, kt); |
---|
2980 | | - ts = ktime_to_timespec(sk->sk_stamp); |
---|
| 3180 | + ts = ktime_to_timespec64(kt); |
---|
2981 | 3181 | } |
---|
2982 | | - return copy_to_user(userstamp, &ts, sizeof(ts)) ? -EFAULT : 0; |
---|
2983 | | -} |
---|
2984 | | -EXPORT_SYMBOL(sock_get_timestampns); |
---|
2985 | 3182 | |
---|
2986 | | -void sock_enable_timestamp(struct sock *sk, int flag) |
---|
| 3183 | + if (timeval) |
---|
| 3184 | + ts.tv_nsec /= 1000; |
---|
| 3185 | + |
---|
| 3186 | +#ifdef CONFIG_COMPAT_32BIT_TIME |
---|
| 3187 | + if (time32) |
---|
| 3188 | + return put_old_timespec32(&ts, userstamp); |
---|
| 3189 | +#endif |
---|
| 3190 | +#ifdef CONFIG_SPARC64 |
---|
| 3191 | + /* beware of padding in sparc64 timeval */ |
---|
| 3192 | + if (timeval && !in_compat_syscall()) { |
---|
| 3193 | + struct __kernel_old_timeval __user tv = { |
---|
| 3194 | + .tv_sec = ts.tv_sec, |
---|
| 3195 | + .tv_usec = ts.tv_nsec, |
---|
| 3196 | + }; |
---|
| 3197 | + if (copy_to_user(userstamp, &tv, sizeof(tv))) |
---|
| 3198 | + return -EFAULT; |
---|
| 3199 | + return 0; |
---|
| 3200 | + } |
---|
| 3201 | +#endif |
---|
| 3202 | + return put_timespec64(&ts, userstamp); |
---|
| 3203 | +} |
---|
| 3204 | +EXPORT_SYMBOL(sock_gettstamp); |
---|
| 3205 | + |
---|
| 3206 | +void sock_enable_timestamp(struct sock *sk, enum sock_flags flag) |
---|
2987 | 3207 | { |
---|
2988 | 3208 | if (!sock_flag(sk, flag)) { |
---|
2989 | 3209 | unsigned long previous_flags = sk->sk_flags; |
---|
.. | .. |
---|
3052 | 3272 | } |
---|
3053 | 3273 | EXPORT_SYMBOL(sock_common_getsockopt); |
---|
3054 | 3274 | |
---|
3055 | | -#ifdef CONFIG_COMPAT |
---|
3056 | | -int compat_sock_common_getsockopt(struct socket *sock, int level, int optname, |
---|
3057 | | - char __user *optval, int __user *optlen) |
---|
3058 | | -{ |
---|
3059 | | - struct sock *sk = sock->sk; |
---|
3060 | | - |
---|
3061 | | - if (sk->sk_prot->compat_getsockopt != NULL) |
---|
3062 | | - return sk->sk_prot->compat_getsockopt(sk, level, optname, |
---|
3063 | | - optval, optlen); |
---|
3064 | | - return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen); |
---|
3065 | | -} |
---|
3066 | | -EXPORT_SYMBOL(compat_sock_common_getsockopt); |
---|
3067 | | -#endif |
---|
3068 | | - |
---|
3069 | 3275 | int sock_common_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, |
---|
3070 | 3276 | int flags) |
---|
3071 | 3277 | { |
---|
.. | .. |
---|
3085 | 3291 | * Set socket options on an inet socket. |
---|
3086 | 3292 | */ |
---|
3087 | 3293 | int sock_common_setsockopt(struct socket *sock, int level, int optname, |
---|
3088 | | - char __user *optval, unsigned int optlen) |
---|
| 3294 | + sockptr_t optval, unsigned int optlen) |
---|
3089 | 3295 | { |
---|
3090 | 3296 | struct sock *sk = sock->sk; |
---|
3091 | 3297 | |
---|
.. | .. |
---|
3093 | 3299 | } |
---|
3094 | 3300 | EXPORT_SYMBOL(sock_common_setsockopt); |
---|
3095 | 3301 | |
---|
3096 | | -#ifdef CONFIG_COMPAT |
---|
3097 | | -int compat_sock_common_setsockopt(struct socket *sock, int level, int optname, |
---|
3098 | | - char __user *optval, unsigned int optlen) |
---|
3099 | | -{ |
---|
3100 | | - struct sock *sk = sock->sk; |
---|
3101 | | - |
---|
3102 | | - if (sk->sk_prot->compat_setsockopt != NULL) |
---|
3103 | | - return sk->sk_prot->compat_setsockopt(sk, level, optname, |
---|
3104 | | - optval, optlen); |
---|
3105 | | - return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen); |
---|
3106 | | -} |
---|
3107 | | -EXPORT_SYMBOL(compat_sock_common_setsockopt); |
---|
3108 | | -#endif |
---|
3109 | | - |
---|
3110 | 3302 | void sk_common_release(struct sock *sk) |
---|
3111 | 3303 | { |
---|
3112 | 3304 | if (sk->sk_prot->destroy) |
---|
3113 | 3305 | sk->sk_prot->destroy(sk); |
---|
3114 | 3306 | |
---|
3115 | 3307 | /* |
---|
3116 | | - * Observation: when sock_common_release is called, processes have |
---|
| 3308 | + * Observation: when sk_common_release is called, processes have |
---|
3117 | 3309 | * no access to socket. But net still has. |
---|
3118 | 3310 | * Step one, detach it from networking: |
---|
3119 | 3311 | * |
---|
.. | .. |
---|
3149 | 3341 | memset(mem, 0, sizeof(*mem) * SK_MEMINFO_VARS); |
---|
3150 | 3342 | |
---|
3151 | 3343 | mem[SK_MEMINFO_RMEM_ALLOC] = sk_rmem_alloc_get(sk); |
---|
3152 | | - mem[SK_MEMINFO_RCVBUF] = sk->sk_rcvbuf; |
---|
| 3344 | + mem[SK_MEMINFO_RCVBUF] = READ_ONCE(sk->sk_rcvbuf); |
---|
3153 | 3345 | mem[SK_MEMINFO_WMEM_ALLOC] = sk_wmem_alloc_get(sk); |
---|
3154 | | - mem[SK_MEMINFO_SNDBUF] = sk->sk_sndbuf; |
---|
| 3346 | + mem[SK_MEMINFO_SNDBUF] = READ_ONCE(sk->sk_sndbuf); |
---|
3155 | 3347 | mem[SK_MEMINFO_FWD_ALLOC] = sk->sk_forward_alloc; |
---|
3156 | | - mem[SK_MEMINFO_WMEM_QUEUED] = sk->sk_wmem_queued; |
---|
| 3348 | + mem[SK_MEMINFO_WMEM_QUEUED] = READ_ONCE(sk->sk_wmem_queued); |
---|
3157 | 3349 | mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc); |
---|
3158 | | - mem[SK_MEMINFO_BACKLOG] = sk->sk_backlog.len; |
---|
| 3350 | + mem[SK_MEMINFO_BACKLOG] = READ_ONCE(sk->sk_backlog.len); |
---|
3159 | 3351 | mem[SK_MEMINFO_DROPS] = atomic_read(&sk->sk_drops); |
---|
3160 | 3352 | } |
---|
3161 | 3353 | |
---|
.. | .. |
---|
3240 | 3432 | |
---|
3241 | 3433 | core_initcall(net_inuse_init); |
---|
3242 | 3434 | |
---|
3243 | | -static void assign_proto_idx(struct proto *prot) |
---|
| 3435 | +static int assign_proto_idx(struct proto *prot) |
---|
3244 | 3436 | { |
---|
3245 | 3437 | prot->inuse_idx = find_first_zero_bit(proto_inuse_idx, PROTO_INUSE_NR); |
---|
3246 | 3438 | |
---|
3247 | 3439 | if (unlikely(prot->inuse_idx == PROTO_INUSE_NR - 1)) { |
---|
3248 | 3440 | pr_err("PROTO_INUSE_NR exhausted\n"); |
---|
3249 | | - return; |
---|
| 3441 | + return -ENOSPC; |
---|
3250 | 3442 | } |
---|
3251 | 3443 | |
---|
3252 | 3444 | set_bit(prot->inuse_idx, proto_inuse_idx); |
---|
| 3445 | + return 0; |
---|
3253 | 3446 | } |
---|
3254 | 3447 | |
---|
3255 | 3448 | static void release_proto_idx(struct proto *prot) |
---|
.. | .. |
---|
3258 | 3451 | clear_bit(prot->inuse_idx, proto_inuse_idx); |
---|
3259 | 3452 | } |
---|
3260 | 3453 | #else |
---|
3261 | | -static inline void assign_proto_idx(struct proto *prot) |
---|
| 3454 | +static inline int assign_proto_idx(struct proto *prot) |
---|
3262 | 3455 | { |
---|
| 3456 | + return 0; |
---|
3263 | 3457 | } |
---|
3264 | 3458 | |
---|
3265 | 3459 | static inline void release_proto_idx(struct proto *prot) |
---|
.. | .. |
---|
3270 | 3464 | { |
---|
3271 | 3465 | } |
---|
3272 | 3466 | #endif |
---|
| 3467 | + |
---|
| 3468 | +static void tw_prot_cleanup(struct timewait_sock_ops *twsk_prot) |
---|
| 3469 | +{ |
---|
| 3470 | + if (!twsk_prot) |
---|
| 3471 | + return; |
---|
| 3472 | + kfree(twsk_prot->twsk_slab_name); |
---|
| 3473 | + twsk_prot->twsk_slab_name = NULL; |
---|
| 3474 | + kmem_cache_destroy(twsk_prot->twsk_slab); |
---|
| 3475 | + twsk_prot->twsk_slab = NULL; |
---|
| 3476 | +} |
---|
3273 | 3477 | |
---|
3274 | 3478 | static void req_prot_cleanup(struct request_sock_ops *rsk_prot) |
---|
3275 | 3479 | { |
---|
.. | .. |
---|
3308 | 3512 | |
---|
3309 | 3513 | int proto_register(struct proto *prot, int alloc_slab) |
---|
3310 | 3514 | { |
---|
| 3515 | + int ret = -ENOBUFS; |
---|
| 3516 | + |
---|
3311 | 3517 | if (alloc_slab) { |
---|
3312 | 3518 | prot->slab = kmem_cache_create_usercopy(prot->name, |
---|
3313 | 3519 | prot->obj_size, 0, |
---|
.. | .. |
---|
3339 | 3545 | prot->slab_flags, |
---|
3340 | 3546 | NULL); |
---|
3341 | 3547 | if (prot->twsk_prot->twsk_slab == NULL) |
---|
3342 | | - goto out_free_timewait_sock_slab_name; |
---|
| 3548 | + goto out_free_timewait_sock_slab; |
---|
3343 | 3549 | } |
---|
3344 | 3550 | } |
---|
3345 | 3551 | |
---|
3346 | 3552 | mutex_lock(&proto_list_mutex); |
---|
| 3553 | + ret = assign_proto_idx(prot); |
---|
| 3554 | + if (ret) { |
---|
| 3555 | + mutex_unlock(&proto_list_mutex); |
---|
| 3556 | + goto out_free_timewait_sock_slab; |
---|
| 3557 | + } |
---|
3347 | 3558 | list_add(&prot->node, &proto_list); |
---|
3348 | | - assign_proto_idx(prot); |
---|
3349 | 3559 | mutex_unlock(&proto_list_mutex); |
---|
3350 | | - return 0; |
---|
| 3560 | + return ret; |
---|
3351 | 3561 | |
---|
3352 | | -out_free_timewait_sock_slab_name: |
---|
3353 | | - kfree(prot->twsk_prot->twsk_slab_name); |
---|
| 3562 | +out_free_timewait_sock_slab: |
---|
| 3563 | + if (alloc_slab && prot->twsk_prot) |
---|
| 3564 | + tw_prot_cleanup(prot->twsk_prot); |
---|
3354 | 3565 | out_free_request_sock_slab: |
---|
3355 | | - req_prot_cleanup(prot->rsk_prot); |
---|
| 3566 | + if (alloc_slab) { |
---|
| 3567 | + req_prot_cleanup(prot->rsk_prot); |
---|
3356 | 3568 | |
---|
3357 | | - kmem_cache_destroy(prot->slab); |
---|
3358 | | - prot->slab = NULL; |
---|
| 3569 | + kmem_cache_destroy(prot->slab); |
---|
| 3570 | + prot->slab = NULL; |
---|
| 3571 | + } |
---|
3359 | 3572 | out: |
---|
3360 | | - return -ENOBUFS; |
---|
| 3573 | + return ret; |
---|
3361 | 3574 | } |
---|
3362 | 3575 | EXPORT_SYMBOL(proto_register); |
---|
3363 | 3576 | |
---|
.. | .. |
---|
3372 | 3585 | prot->slab = NULL; |
---|
3373 | 3586 | |
---|
3374 | 3587 | req_prot_cleanup(prot->rsk_prot); |
---|
3375 | | - |
---|
3376 | | - if (prot->twsk_prot != NULL && prot->twsk_prot->twsk_slab != NULL) { |
---|
3377 | | - kmem_cache_destroy(prot->twsk_prot->twsk_slab); |
---|
3378 | | - kfree(prot->twsk_prot->twsk_slab_name); |
---|
3379 | | - prot->twsk_prot->twsk_slab = NULL; |
---|
3380 | | - } |
---|
| 3588 | + tw_prot_cleanup(prot->twsk_prot); |
---|
3381 | 3589 | } |
---|
3382 | 3590 | EXPORT_SYMBOL(proto_unregister); |
---|
3383 | 3591 | |
---|
.. | .. |
---|
3394 | 3602 | #ifdef CONFIG_INET |
---|
3395 | 3603 | if (family == AF_INET && |
---|
3396 | 3604 | protocol != IPPROTO_RAW && |
---|
| 3605 | + protocol < MAX_INET_PROTOS && |
---|
3397 | 3606 | !rcu_access_pointer(inet_protos[protocol])) |
---|
3398 | 3607 | return -ENOENT; |
---|
3399 | 3608 | #endif |
---|
.. | .. |
---|
3431 | 3640 | return proto->memory_allocated != NULL ? proto_memory_allocated(proto) : -1L; |
---|
3432 | 3641 | } |
---|
3433 | 3642 | |
---|
3434 | | -static char *sock_prot_memory_pressure(struct proto *proto) |
---|
| 3643 | +static const char *sock_prot_memory_pressure(struct proto *proto) |
---|
3435 | 3644 | { |
---|
3436 | 3645 | return proto->memory_pressure != NULL ? |
---|
3437 | 3646 | proto_memory_pressure(proto) ? "yes" : "no" : "NI"; |
---|
.. | .. |
---|
3535 | 3744 | } |
---|
3536 | 3745 | EXPORT_SYMBOL(sk_busy_loop_end); |
---|
3537 | 3746 | #endif /* CONFIG_NET_RX_BUSY_POLL */ |
---|
| 3747 | + |
---|
| 3748 | +int sock_bind_add(struct sock *sk, struct sockaddr *addr, int addr_len) |
---|
| 3749 | +{ |
---|
| 3750 | + if (!sk->sk_prot->bind_add) |
---|
| 3751 | + return -EOPNOTSUPP; |
---|
| 3752 | + return sk->sk_prot->bind_add(sk, addr, addr_len); |
---|
| 3753 | +} |
---|
| 3754 | +EXPORT_SYMBOL(sock_bind_add); |
---|