| .. | .. |
|---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-or-later |
|---|
| 1 | 2 | /* |
|---|
| 2 | 3 | * INET An implementation of the TCP/IP protocol suite for the LINUX |
|---|
| 3 | 4 | * operating system. INET is implemented using the BSD Socket |
|---|
| .. | .. |
|---|
| 5 | 6 | * |
|---|
| 6 | 7 | * Generic socket support routines. Memory allocators, socket lock/release |
|---|
| 7 | 8 | * handler for protocols to use and generic option handler. |
|---|
| 8 | | - * |
|---|
| 9 | 9 | * |
|---|
| 10 | 10 | * Authors: Ross Biro |
|---|
| 11 | 11 | * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> |
|---|
| .. | .. |
|---|
| 81 | 81 | * Arnaldo C. Melo : cleanups, use skb_queue_purge |
|---|
| 82 | 82 | * |
|---|
| 83 | 83 | * To Fix: |
|---|
| 84 | | - * |
|---|
| 85 | | - * |
|---|
| 86 | | - * This program is free software; you can redistribute it and/or |
|---|
| 87 | | - * modify it under the terms of the GNU General Public License |
|---|
| 88 | | - * as published by the Free Software Foundation; either version |
|---|
| 89 | | - * 2 of the License, or (at your option) any later version. |
|---|
| 90 | 84 | */ |
|---|
| 91 | 85 | |
|---|
| 92 | 86 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
|---|
| .. | .. |
|---|
| 119 | 113 | #include <linux/static_key.h> |
|---|
| 120 | 114 | #include <linux/memcontrol.h> |
|---|
| 121 | 115 | #include <linux/prefetch.h> |
|---|
| 116 | +#include <linux/compat.h> |
|---|
| 122 | 117 | |
|---|
| 123 | 118 | #include <linux/uaccess.h> |
|---|
| 124 | 119 | |
|---|
| .. | .. |
|---|
| 137 | 132 | |
|---|
| 138 | 133 | #include <linux/filter.h> |
|---|
| 139 | 134 | #include <net/sock_reuseport.h> |
|---|
| 135 | +#include <net/bpf_sk_storage.h> |
|---|
| 140 | 136 | |
|---|
| 141 | 137 | #include <trace/events/sock.h> |
|---|
| 138 | +#include <trace/hooks/sched.h> |
|---|
| 142 | 139 | |
|---|
| 143 | 140 | #include <net/tcp.h> |
|---|
| 144 | 141 | #include <net/busy_poll.h> |
|---|
| .. | .. |
|---|
| 335 | 332 | } |
|---|
| 336 | 333 | EXPORT_SYMBOL(__sk_backlog_rcv); |
|---|
| 337 | 334 | |
|---|
| 338 | | -static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen) |
|---|
| 335 | +static int sock_get_timeout(long timeo, void *optval, bool old_timeval) |
|---|
| 339 | 336 | { |
|---|
| 340 | | - struct timeval tv; |
|---|
| 337 | + struct __kernel_sock_timeval tv; |
|---|
| 341 | 338 | |
|---|
| 342 | | - if (optlen < sizeof(tv)) |
|---|
| 343 | | - return -EINVAL; |
|---|
| 344 | | - if (copy_from_user(&tv, optval, sizeof(tv))) |
|---|
| 345 | | - return -EFAULT; |
|---|
| 339 | + if (timeo == MAX_SCHEDULE_TIMEOUT) { |
|---|
| 340 | + tv.tv_sec = 0; |
|---|
| 341 | + tv.tv_usec = 0; |
|---|
| 342 | + } else { |
|---|
| 343 | + tv.tv_sec = timeo / HZ; |
|---|
| 344 | + tv.tv_usec = ((timeo % HZ) * USEC_PER_SEC) / HZ; |
|---|
| 345 | + } |
|---|
| 346 | + |
|---|
| 347 | + if (old_timeval && in_compat_syscall() && !COMPAT_USE_64BIT_TIME) { |
|---|
| 348 | + struct old_timeval32 tv32 = { tv.tv_sec, tv.tv_usec }; |
|---|
| 349 | + *(struct old_timeval32 *)optval = tv32; |
|---|
| 350 | + return sizeof(tv32); |
|---|
| 351 | + } |
|---|
| 352 | + |
|---|
| 353 | + if (old_timeval) { |
|---|
| 354 | + struct __kernel_old_timeval old_tv; |
|---|
| 355 | + old_tv.tv_sec = tv.tv_sec; |
|---|
| 356 | + old_tv.tv_usec = tv.tv_usec; |
|---|
| 357 | + *(struct __kernel_old_timeval *)optval = old_tv; |
|---|
| 358 | + return sizeof(old_tv); |
|---|
| 359 | + } |
|---|
| 360 | + |
|---|
| 361 | + *(struct __kernel_sock_timeval *)optval = tv; |
|---|
| 362 | + return sizeof(tv); |
|---|
| 363 | +} |
|---|
| 364 | + |
|---|
| 365 | +static int sock_set_timeout(long *timeo_p, sockptr_t optval, int optlen, |
|---|
| 366 | + bool old_timeval) |
|---|
| 367 | +{ |
|---|
| 368 | + struct __kernel_sock_timeval tv; |
|---|
| 369 | + |
|---|
| 370 | + if (old_timeval && in_compat_syscall() && !COMPAT_USE_64BIT_TIME) { |
|---|
| 371 | + struct old_timeval32 tv32; |
|---|
| 372 | + |
|---|
| 373 | + if (optlen < sizeof(tv32)) |
|---|
| 374 | + return -EINVAL; |
|---|
| 375 | + |
|---|
| 376 | + if (copy_from_sockptr(&tv32, optval, sizeof(tv32))) |
|---|
| 377 | + return -EFAULT; |
|---|
| 378 | + tv.tv_sec = tv32.tv_sec; |
|---|
| 379 | + tv.tv_usec = tv32.tv_usec; |
|---|
| 380 | + } else if (old_timeval) { |
|---|
| 381 | + struct __kernel_old_timeval old_tv; |
|---|
| 382 | + |
|---|
| 383 | + if (optlen < sizeof(old_tv)) |
|---|
| 384 | + return -EINVAL; |
|---|
| 385 | + if (copy_from_sockptr(&old_tv, optval, sizeof(old_tv))) |
|---|
| 386 | + return -EFAULT; |
|---|
| 387 | + tv.tv_sec = old_tv.tv_sec; |
|---|
| 388 | + tv.tv_usec = old_tv.tv_usec; |
|---|
| 389 | + } else { |
|---|
| 390 | + if (optlen < sizeof(tv)) |
|---|
| 391 | + return -EINVAL; |
|---|
| 392 | + if (copy_from_sockptr(&tv, optval, sizeof(tv))) |
|---|
| 393 | + return -EFAULT; |
|---|
| 394 | + } |
|---|
| 346 | 395 | if (tv.tv_usec < 0 || tv.tv_usec >= USEC_PER_SEC) |
|---|
| 347 | 396 | return -EDOM; |
|---|
| 348 | 397 | |
|---|
| .. | .. |
|---|
| 360 | 409 | *timeo_p = MAX_SCHEDULE_TIMEOUT; |
|---|
| 361 | 410 | if (tv.tv_sec == 0 && tv.tv_usec == 0) |
|---|
| 362 | 411 | return 0; |
|---|
| 363 | | - if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT/HZ - 1)) |
|---|
| 364 | | - *timeo_p = tv.tv_sec * HZ + DIV_ROUND_UP(tv.tv_usec, USEC_PER_SEC / HZ); |
|---|
| 412 | + if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT / HZ - 1)) |
|---|
| 413 | + *timeo_p = tv.tv_sec * HZ + DIV_ROUND_UP((unsigned long)tv.tv_usec, USEC_PER_SEC / HZ); |
|---|
| 365 | 414 | return 0; |
|---|
| 366 | | -} |
|---|
| 367 | | - |
|---|
| 368 | | -static void sock_warn_obsolete_bsdism(const char *name) |
|---|
| 369 | | -{ |
|---|
| 370 | | - static int warned; |
|---|
| 371 | | - static char warncomm[TASK_COMM_LEN]; |
|---|
| 372 | | - if (strcmp(warncomm, current->comm) && warned < 5) { |
|---|
| 373 | | - strcpy(warncomm, current->comm); |
|---|
| 374 | | - pr_warn("process `%s' is using obsolete %s SO_BSDCOMPAT\n", |
|---|
| 375 | | - warncomm, name); |
|---|
| 376 | | - warned++; |
|---|
| 377 | | - } |
|---|
| 378 | 415 | } |
|---|
| 379 | 416 | |
|---|
| 380 | 417 | static bool sock_needs_netstamp(const struct sock *sk) |
|---|
| .. | .. |
|---|
| 472 | 509 | |
|---|
| 473 | 510 | rc = sk_backlog_rcv(sk, skb); |
|---|
| 474 | 511 | |
|---|
| 475 | | - mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_); |
|---|
| 476 | | - } else if (sk_add_backlog(sk, skb, sk->sk_rcvbuf)) { |
|---|
| 512 | + mutex_release(&sk->sk_lock.dep_map, _RET_IP_); |
|---|
| 513 | + } else if (sk_add_backlog(sk, skb, READ_ONCE(sk->sk_rcvbuf))) { |
|---|
| 477 | 514 | bh_unlock_sock(sk); |
|---|
| 478 | 515 | atomic_inc(&sk->sk_drops); |
|---|
| 479 | 516 | goto discard_and_relse; |
|---|
| .. | .. |
|---|
| 520 | 557 | } |
|---|
| 521 | 558 | EXPORT_SYMBOL(sk_dst_check); |
|---|
| 522 | 559 | |
|---|
| 523 | | -static int sock_setbindtodevice(struct sock *sk, char __user *optval, |
|---|
| 524 | | - int optlen) |
|---|
| 560 | +static int sock_bindtoindex_locked(struct sock *sk, int ifindex) |
|---|
| 561 | +{ |
|---|
| 562 | + int ret = -ENOPROTOOPT; |
|---|
| 563 | +#ifdef CONFIG_NETDEVICES |
|---|
| 564 | + struct net *net = sock_net(sk); |
|---|
| 565 | + |
|---|
| 566 | + /* Sorry... */ |
|---|
| 567 | + ret = -EPERM; |
|---|
| 568 | + if (sk->sk_bound_dev_if && !ns_capable(net->user_ns, CAP_NET_RAW)) |
|---|
| 569 | + goto out; |
|---|
| 570 | + |
|---|
| 571 | + ret = -EINVAL; |
|---|
| 572 | + if (ifindex < 0) |
|---|
| 573 | + goto out; |
|---|
| 574 | + |
|---|
| 575 | + sk->sk_bound_dev_if = ifindex; |
|---|
| 576 | + if (sk->sk_prot->rehash) |
|---|
| 577 | + sk->sk_prot->rehash(sk); |
|---|
| 578 | + sk_dst_reset(sk); |
|---|
| 579 | + |
|---|
| 580 | + ret = 0; |
|---|
| 581 | + |
|---|
| 582 | +out: |
|---|
| 583 | +#endif |
|---|
| 584 | + |
|---|
| 585 | + return ret; |
|---|
| 586 | +} |
|---|
| 587 | + |
|---|
| 588 | +int sock_bindtoindex(struct sock *sk, int ifindex, bool lock_sk) |
|---|
| 589 | +{ |
|---|
| 590 | + int ret; |
|---|
| 591 | + |
|---|
| 592 | + if (lock_sk) |
|---|
| 593 | + lock_sock(sk); |
|---|
| 594 | + ret = sock_bindtoindex_locked(sk, ifindex); |
|---|
| 595 | + if (lock_sk) |
|---|
| 596 | + release_sock(sk); |
|---|
| 597 | + |
|---|
| 598 | + return ret; |
|---|
| 599 | +} |
|---|
| 600 | +EXPORT_SYMBOL(sock_bindtoindex); |
|---|
| 601 | + |
|---|
| 602 | +static int sock_setbindtodevice(struct sock *sk, sockptr_t optval, int optlen) |
|---|
| 525 | 603 | { |
|---|
| 526 | 604 | int ret = -ENOPROTOOPT; |
|---|
| 527 | 605 | #ifdef CONFIG_NETDEVICES |
|---|
| 528 | 606 | struct net *net = sock_net(sk); |
|---|
| 529 | 607 | char devname[IFNAMSIZ]; |
|---|
| 530 | 608 | int index; |
|---|
| 531 | | - |
|---|
| 532 | | - /* Sorry... */ |
|---|
| 533 | | - ret = -EPERM; |
|---|
| 534 | | - if (!ns_capable(net->user_ns, CAP_NET_RAW)) |
|---|
| 535 | | - goto out; |
|---|
| 536 | 609 | |
|---|
| 537 | 610 | ret = -EINVAL; |
|---|
| 538 | 611 | if (optlen < 0) |
|---|
| .. | .. |
|---|
| 548 | 621 | memset(devname, 0, sizeof(devname)); |
|---|
| 549 | 622 | |
|---|
| 550 | 623 | ret = -EFAULT; |
|---|
| 551 | | - if (copy_from_user(devname, optval, optlen)) |
|---|
| 624 | + if (copy_from_sockptr(devname, optval, optlen)) |
|---|
| 552 | 625 | goto out; |
|---|
| 553 | 626 | |
|---|
| 554 | 627 | index = 0; |
|---|
| .. | .. |
|---|
| 565 | 638 | goto out; |
|---|
| 566 | 639 | } |
|---|
| 567 | 640 | |
|---|
| 568 | | - lock_sock(sk); |
|---|
| 569 | | - sk->sk_bound_dev_if = index; |
|---|
| 570 | | - sk_dst_reset(sk); |
|---|
| 571 | | - release_sock(sk); |
|---|
| 572 | | - |
|---|
| 573 | | - ret = 0; |
|---|
| 574 | | - |
|---|
| 641 | + return sock_bindtoindex(sk, index, true); |
|---|
| 575 | 642 | out: |
|---|
| 576 | 643 | #endif |
|---|
| 577 | 644 | |
|---|
| .. | .. |
|---|
| 618 | 685 | return ret; |
|---|
| 619 | 686 | } |
|---|
| 620 | 687 | |
|---|
| 621 | | -static inline void sock_valbool_flag(struct sock *sk, int bit, int valbool) |
|---|
| 622 | | -{ |
|---|
| 623 | | - if (valbool) |
|---|
| 624 | | - sock_set_flag(sk, bit); |
|---|
| 625 | | - else |
|---|
| 626 | | - sock_reset_flag(sk, bit); |
|---|
| 627 | | -} |
|---|
| 628 | | - |
|---|
| 629 | 688 | bool sk_mc_loop(struct sock *sk) |
|---|
| 630 | 689 | { |
|---|
| 631 | 690 | if (dev_recursion_level()) |
|---|
| .. | .. |
|---|
| 645 | 704 | } |
|---|
| 646 | 705 | EXPORT_SYMBOL(sk_mc_loop); |
|---|
| 647 | 706 | |
|---|
| 707 | +void sock_set_reuseaddr(struct sock *sk) |
|---|
| 708 | +{ |
|---|
| 709 | + lock_sock(sk); |
|---|
| 710 | + sk->sk_reuse = SK_CAN_REUSE; |
|---|
| 711 | + release_sock(sk); |
|---|
| 712 | +} |
|---|
| 713 | +EXPORT_SYMBOL(sock_set_reuseaddr); |
|---|
| 714 | + |
|---|
| 715 | +void sock_set_reuseport(struct sock *sk) |
|---|
| 716 | +{ |
|---|
| 717 | + lock_sock(sk); |
|---|
| 718 | + sk->sk_reuseport = true; |
|---|
| 719 | + release_sock(sk); |
|---|
| 720 | +} |
|---|
| 721 | +EXPORT_SYMBOL(sock_set_reuseport); |
|---|
| 722 | + |
|---|
| 723 | +void sock_no_linger(struct sock *sk) |
|---|
| 724 | +{ |
|---|
| 725 | + lock_sock(sk); |
|---|
| 726 | + sk->sk_lingertime = 0; |
|---|
| 727 | + sock_set_flag(sk, SOCK_LINGER); |
|---|
| 728 | + release_sock(sk); |
|---|
| 729 | +} |
|---|
| 730 | +EXPORT_SYMBOL(sock_no_linger); |
|---|
| 731 | + |
|---|
| 732 | +void sock_set_priority(struct sock *sk, u32 priority) |
|---|
| 733 | +{ |
|---|
| 734 | + lock_sock(sk); |
|---|
| 735 | + sk->sk_priority = priority; |
|---|
| 736 | + release_sock(sk); |
|---|
| 737 | +} |
|---|
| 738 | +EXPORT_SYMBOL(sock_set_priority); |
|---|
| 739 | + |
|---|
| 740 | +void sock_set_sndtimeo(struct sock *sk, s64 secs) |
|---|
| 741 | +{ |
|---|
| 742 | + lock_sock(sk); |
|---|
| 743 | + if (secs && secs < MAX_SCHEDULE_TIMEOUT / HZ - 1) |
|---|
| 744 | + sk->sk_sndtimeo = secs * HZ; |
|---|
| 745 | + else |
|---|
| 746 | + sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT; |
|---|
| 747 | + release_sock(sk); |
|---|
| 748 | +} |
|---|
| 749 | +EXPORT_SYMBOL(sock_set_sndtimeo); |
|---|
| 750 | + |
|---|
| 751 | +static void __sock_set_timestamps(struct sock *sk, bool val, bool new, bool ns) |
|---|
| 752 | +{ |
|---|
| 753 | + if (val) { |
|---|
| 754 | + sock_valbool_flag(sk, SOCK_TSTAMP_NEW, new); |
|---|
| 755 | + sock_valbool_flag(sk, SOCK_RCVTSTAMPNS, ns); |
|---|
| 756 | + sock_set_flag(sk, SOCK_RCVTSTAMP); |
|---|
| 757 | + sock_enable_timestamp(sk, SOCK_TIMESTAMP); |
|---|
| 758 | + } else { |
|---|
| 759 | + sock_reset_flag(sk, SOCK_RCVTSTAMP); |
|---|
| 760 | + sock_reset_flag(sk, SOCK_RCVTSTAMPNS); |
|---|
| 761 | + } |
|---|
| 762 | +} |
|---|
| 763 | + |
|---|
| 764 | +void sock_enable_timestamps(struct sock *sk) |
|---|
| 765 | +{ |
|---|
| 766 | + lock_sock(sk); |
|---|
| 767 | + __sock_set_timestamps(sk, true, false, true); |
|---|
| 768 | + release_sock(sk); |
|---|
| 769 | +} |
|---|
| 770 | +EXPORT_SYMBOL(sock_enable_timestamps); |
|---|
| 771 | + |
|---|
| 772 | +void sock_set_keepalive(struct sock *sk) |
|---|
| 773 | +{ |
|---|
| 774 | + lock_sock(sk); |
|---|
| 775 | + if (sk->sk_prot->keepalive) |
|---|
| 776 | + sk->sk_prot->keepalive(sk, true); |
|---|
| 777 | + sock_valbool_flag(sk, SOCK_KEEPOPEN, true); |
|---|
| 778 | + release_sock(sk); |
|---|
| 779 | +} |
|---|
| 780 | +EXPORT_SYMBOL(sock_set_keepalive); |
|---|
| 781 | + |
|---|
| 782 | +static void __sock_set_rcvbuf(struct sock *sk, int val) |
|---|
| 783 | +{ |
|---|
| 784 | + /* Ensure val * 2 fits into an int, to prevent max_t() from treating it |
|---|
| 785 | + * as a negative value. |
|---|
| 786 | + */ |
|---|
| 787 | + val = min_t(int, val, INT_MAX / 2); |
|---|
| 788 | + sk->sk_userlocks |= SOCK_RCVBUF_LOCK; |
|---|
| 789 | + |
|---|
| 790 | + /* We double it on the way in to account for "struct sk_buff" etc. |
|---|
| 791 | + * overhead. Applications assume that the SO_RCVBUF setting they make |
|---|
| 792 | + * will allow that much actual data to be received on that socket. |
|---|
| 793 | + * |
|---|
| 794 | + * Applications are unaware that "struct sk_buff" and other overheads |
|---|
| 795 | + * allocate from the receive buffer during socket buffer allocation. |
|---|
| 796 | + * |
|---|
| 797 | + * And after considering the possible alternatives, returning the value |
|---|
| 798 | + * we actually used in getsockopt is the most desirable behavior. |
|---|
| 799 | + */ |
|---|
| 800 | + WRITE_ONCE(sk->sk_rcvbuf, max_t(int, val * 2, SOCK_MIN_RCVBUF)); |
|---|
| 801 | +} |
|---|
| 802 | + |
|---|
| 803 | +void sock_set_rcvbuf(struct sock *sk, int val) |
|---|
| 804 | +{ |
|---|
| 805 | + lock_sock(sk); |
|---|
| 806 | + __sock_set_rcvbuf(sk, val); |
|---|
| 807 | + release_sock(sk); |
|---|
| 808 | +} |
|---|
| 809 | +EXPORT_SYMBOL(sock_set_rcvbuf); |
|---|
| 810 | + |
|---|
| 811 | +static void __sock_set_mark(struct sock *sk, u32 val) |
|---|
| 812 | +{ |
|---|
| 813 | + if (val != sk->sk_mark) { |
|---|
| 814 | + sk->sk_mark = val; |
|---|
| 815 | + sk_dst_reset(sk); |
|---|
| 816 | + } |
|---|
| 817 | +} |
|---|
| 818 | + |
|---|
| 819 | +void sock_set_mark(struct sock *sk, u32 val) |
|---|
| 820 | +{ |
|---|
| 821 | + lock_sock(sk); |
|---|
| 822 | + __sock_set_mark(sk, val); |
|---|
| 823 | + release_sock(sk); |
|---|
| 824 | +} |
|---|
| 825 | +EXPORT_SYMBOL(sock_set_mark); |
|---|
| 826 | + |
|---|
| 648 | 827 | /* |
|---|
| 649 | 828 | * This is meant for all protocols to use and covers goings on |
|---|
| 650 | 829 | * at the socket level. Everything here is generic. |
|---|
| 651 | 830 | */ |
|---|
| 652 | 831 | |
|---|
| 653 | 832 | int sock_setsockopt(struct socket *sock, int level, int optname, |
|---|
| 654 | | - char __user *optval, unsigned int optlen) |
|---|
| 833 | + sockptr_t optval, unsigned int optlen) |
|---|
| 655 | 834 | { |
|---|
| 656 | 835 | struct sock_txtime sk_txtime; |
|---|
| 657 | 836 | struct sock *sk = sock->sk; |
|---|
| .. | .. |
|---|
| 670 | 849 | if (optlen < sizeof(int)) |
|---|
| 671 | 850 | return -EINVAL; |
|---|
| 672 | 851 | |
|---|
| 673 | | - if (get_user(val, (int __user *)optval)) |
|---|
| 852 | + if (copy_from_sockptr(&val, optval, sizeof(val))) |
|---|
| 674 | 853 | return -EFAULT; |
|---|
| 675 | 854 | |
|---|
| 676 | 855 | valbool = val ? 1 : 0; |
|---|
| .. | .. |
|---|
| 709 | 888 | * play 'guess the biggest size' games. RCVBUF/SNDBUF |
|---|
| 710 | 889 | * are treated in BSD as hints |
|---|
| 711 | 890 | */ |
|---|
| 712 | | - val = min_t(u32, val, sysctl_wmem_max); |
|---|
| 891 | + val = min_t(u32, val, READ_ONCE(sysctl_wmem_max)); |
|---|
| 713 | 892 | set_sndbuf: |
|---|
| 893 | + /* Ensure val * 2 fits into an int, to prevent max_t() |
|---|
| 894 | + * from treating it as a negative value. |
|---|
| 895 | + */ |
|---|
| 896 | + val = min_t(int, val, INT_MAX / 2); |
|---|
| 714 | 897 | sk->sk_userlocks |= SOCK_SNDBUF_LOCK; |
|---|
| 715 | | - sk->sk_sndbuf = max_t(int, val * 2, SOCK_MIN_SNDBUF); |
|---|
| 898 | + WRITE_ONCE(sk->sk_sndbuf, |
|---|
| 899 | + max_t(int, val * 2, SOCK_MIN_SNDBUF)); |
|---|
| 716 | 900 | /* Wake up sending tasks if we upped the value. */ |
|---|
| 717 | 901 | sk->sk_write_space(sk); |
|---|
| 718 | 902 | break; |
|---|
| .. | .. |
|---|
| 722 | 906 | ret = -EPERM; |
|---|
| 723 | 907 | break; |
|---|
| 724 | 908 | } |
|---|
| 909 | + |
|---|
| 910 | + /* No negative values (to prevent underflow, as val will be |
|---|
| 911 | + * multiplied by 2). |
|---|
| 912 | + */ |
|---|
| 913 | + if (val < 0) |
|---|
| 914 | + val = 0; |
|---|
| 725 | 915 | goto set_sndbuf; |
|---|
| 726 | 916 | |
|---|
| 727 | 917 | case SO_RCVBUF: |
|---|
| .. | .. |
|---|
| 730 | 920 | * play 'guess the biggest size' games. RCVBUF/SNDBUF |
|---|
| 731 | 921 | * are treated in BSD as hints |
|---|
| 732 | 922 | */ |
|---|
| 733 | | - val = min_t(u32, val, sysctl_rmem_max); |
|---|
| 734 | | -set_rcvbuf: |
|---|
| 735 | | - sk->sk_userlocks |= SOCK_RCVBUF_LOCK; |
|---|
| 736 | | - /* |
|---|
| 737 | | - * We double it on the way in to account for |
|---|
| 738 | | - * "struct sk_buff" etc. overhead. Applications |
|---|
| 739 | | - * assume that the SO_RCVBUF setting they make will |
|---|
| 740 | | - * allow that much actual data to be received on that |
|---|
| 741 | | - * socket. |
|---|
| 742 | | - * |
|---|
| 743 | | - * Applications are unaware that "struct sk_buff" and |
|---|
| 744 | | - * other overheads allocate from the receive buffer |
|---|
| 745 | | - * during socket buffer allocation. |
|---|
| 746 | | - * |
|---|
| 747 | | - * And after considering the possible alternatives, |
|---|
| 748 | | - * returning the value we actually used in getsockopt |
|---|
| 749 | | - * is the most desirable behavior. |
|---|
| 750 | | - */ |
|---|
| 751 | | - sk->sk_rcvbuf = max_t(int, val * 2, SOCK_MIN_RCVBUF); |
|---|
| 923 | + __sock_set_rcvbuf(sk, min_t(u32, val, READ_ONCE(sysctl_rmem_max))); |
|---|
| 752 | 924 | break; |
|---|
| 753 | 925 | |
|---|
| 754 | 926 | case SO_RCVBUFFORCE: |
|---|
| .. | .. |
|---|
| 756 | 928 | ret = -EPERM; |
|---|
| 757 | 929 | break; |
|---|
| 758 | 930 | } |
|---|
| 759 | | - goto set_rcvbuf; |
|---|
| 931 | + |
|---|
| 932 | + /* No negative values (to prevent underflow, as val will be |
|---|
| 933 | + * multiplied by 2). |
|---|
| 934 | + */ |
|---|
| 935 | + __sock_set_rcvbuf(sk, max(val, 0)); |
|---|
| 936 | + break; |
|---|
| 760 | 937 | |
|---|
| 761 | 938 | case SO_KEEPALIVE: |
|---|
| 762 | 939 | if (sk->sk_prot->keepalive) |
|---|
| .. | .. |
|---|
| 785 | 962 | ret = -EINVAL; /* 1003.1g */ |
|---|
| 786 | 963 | break; |
|---|
| 787 | 964 | } |
|---|
| 788 | | - if (copy_from_user(&ling, optval, sizeof(ling))) { |
|---|
| 965 | + if (copy_from_sockptr(&ling, optval, sizeof(ling))) { |
|---|
| 789 | 966 | ret = -EFAULT; |
|---|
| 790 | 967 | break; |
|---|
| 791 | 968 | } |
|---|
| .. | .. |
|---|
| 803 | 980 | break; |
|---|
| 804 | 981 | |
|---|
| 805 | 982 | case SO_BSDCOMPAT: |
|---|
| 806 | | - sock_warn_obsolete_bsdism("setsockopt"); |
|---|
| 807 | 983 | break; |
|---|
| 808 | 984 | |
|---|
| 809 | 985 | case SO_PASSCRED: |
|---|
| .. | .. |
|---|
| 813 | 989 | clear_bit(SOCK_PASSCRED, &sock->flags); |
|---|
| 814 | 990 | break; |
|---|
| 815 | 991 | |
|---|
| 816 | | - case SO_TIMESTAMP: |
|---|
| 817 | | - case SO_TIMESTAMPNS: |
|---|
| 818 | | - if (valbool) { |
|---|
| 819 | | - if (optname == SO_TIMESTAMP) |
|---|
| 820 | | - sock_reset_flag(sk, SOCK_RCVTSTAMPNS); |
|---|
| 821 | | - else |
|---|
| 822 | | - sock_set_flag(sk, SOCK_RCVTSTAMPNS); |
|---|
| 823 | | - sock_set_flag(sk, SOCK_RCVTSTAMP); |
|---|
| 824 | | - sock_enable_timestamp(sk, SOCK_TIMESTAMP); |
|---|
| 825 | | - } else { |
|---|
| 826 | | - sock_reset_flag(sk, SOCK_RCVTSTAMP); |
|---|
| 827 | | - sock_reset_flag(sk, SOCK_RCVTSTAMPNS); |
|---|
| 828 | | - } |
|---|
| 992 | + case SO_TIMESTAMP_OLD: |
|---|
| 993 | + __sock_set_timestamps(sk, valbool, false, false); |
|---|
| 829 | 994 | break; |
|---|
| 830 | | - |
|---|
| 831 | | - case SO_TIMESTAMPING: |
|---|
| 995 | + case SO_TIMESTAMP_NEW: |
|---|
| 996 | + __sock_set_timestamps(sk, valbool, true, false); |
|---|
| 997 | + break; |
|---|
| 998 | + case SO_TIMESTAMPNS_OLD: |
|---|
| 999 | + __sock_set_timestamps(sk, valbool, false, true); |
|---|
| 1000 | + break; |
|---|
| 1001 | + case SO_TIMESTAMPNS_NEW: |
|---|
| 1002 | + __sock_set_timestamps(sk, valbool, true, true); |
|---|
| 1003 | + break; |
|---|
| 1004 | + case SO_TIMESTAMPING_NEW: |
|---|
| 1005 | + case SO_TIMESTAMPING_OLD: |
|---|
| 832 | 1006 | if (val & ~SOF_TIMESTAMPING_MASK) { |
|---|
| 833 | 1007 | ret = -EINVAL; |
|---|
| 834 | 1008 | break; |
|---|
| .. | .. |
|---|
| 856 | 1030 | } |
|---|
| 857 | 1031 | |
|---|
| 858 | 1032 | sk->sk_tsflags = val; |
|---|
| 1033 | + sock_valbool_flag(sk, SOCK_TSTAMP_NEW, optname == SO_TIMESTAMPING_NEW); |
|---|
| 1034 | + |
|---|
| 859 | 1035 | if (val & SOF_TIMESTAMPING_RX_SOFTWARE) |
|---|
| 860 | 1036 | sock_enable_timestamp(sk, |
|---|
| 861 | 1037 | SOCK_TIMESTAMPING_RX_SOFTWARE); |
|---|
| .. | .. |
|---|
| 870 | 1046 | if (sock->ops->set_rcvlowat) |
|---|
| 871 | 1047 | ret = sock->ops->set_rcvlowat(sk, val); |
|---|
| 872 | 1048 | else |
|---|
| 873 | | - sk->sk_rcvlowat = val ? : 1; |
|---|
| 1049 | + WRITE_ONCE(sk->sk_rcvlowat, val ? : 1); |
|---|
| 874 | 1050 | break; |
|---|
| 875 | 1051 | |
|---|
| 876 | | - case SO_RCVTIMEO: |
|---|
| 877 | | - ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen); |
|---|
| 1052 | + case SO_RCVTIMEO_OLD: |
|---|
| 1053 | + case SO_RCVTIMEO_NEW: |
|---|
| 1054 | + ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, |
|---|
| 1055 | + optlen, optname == SO_RCVTIMEO_OLD); |
|---|
| 878 | 1056 | break; |
|---|
| 879 | 1057 | |
|---|
| 880 | | - case SO_SNDTIMEO: |
|---|
| 881 | | - ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen); |
|---|
| 1058 | + case SO_SNDTIMEO_OLD: |
|---|
| 1059 | + case SO_SNDTIMEO_NEW: |
|---|
| 1060 | + ret = sock_set_timeout(&sk->sk_sndtimeo, optval, |
|---|
| 1061 | + optlen, optname == SO_SNDTIMEO_OLD); |
|---|
| 882 | 1062 | break; |
|---|
| 883 | 1063 | |
|---|
| 884 | | - case SO_ATTACH_FILTER: |
|---|
| 885 | | - ret = -EINVAL; |
|---|
| 886 | | - if (optlen == sizeof(struct sock_fprog)) { |
|---|
| 887 | | - struct sock_fprog fprog; |
|---|
| 1064 | + case SO_ATTACH_FILTER: { |
|---|
| 1065 | + struct sock_fprog fprog; |
|---|
| 888 | 1066 | |
|---|
| 889 | | - ret = -EFAULT; |
|---|
| 890 | | - if (copy_from_user(&fprog, optval, sizeof(fprog))) |
|---|
| 891 | | - break; |
|---|
| 892 | | - |
|---|
| 1067 | + ret = copy_bpf_fprog_from_user(&fprog, optval, optlen); |
|---|
| 1068 | + if (!ret) |
|---|
| 893 | 1069 | ret = sk_attach_filter(&fprog, sk); |
|---|
| 894 | | - } |
|---|
| 895 | 1070 | break; |
|---|
| 896 | | - |
|---|
| 1071 | + } |
|---|
| 897 | 1072 | case SO_ATTACH_BPF: |
|---|
| 898 | 1073 | ret = -EINVAL; |
|---|
| 899 | 1074 | if (optlen == sizeof(u32)) { |
|---|
| 900 | 1075 | u32 ufd; |
|---|
| 901 | 1076 | |
|---|
| 902 | 1077 | ret = -EFAULT; |
|---|
| 903 | | - if (copy_from_user(&ufd, optval, sizeof(ufd))) |
|---|
| 1078 | + if (copy_from_sockptr(&ufd, optval, sizeof(ufd))) |
|---|
| 904 | 1079 | break; |
|---|
| 905 | 1080 | |
|---|
| 906 | 1081 | ret = sk_attach_bpf(ufd, sk); |
|---|
| 907 | 1082 | } |
|---|
| 908 | 1083 | break; |
|---|
| 909 | 1084 | |
|---|
| 910 | | - case SO_ATTACH_REUSEPORT_CBPF: |
|---|
| 911 | | - ret = -EINVAL; |
|---|
| 912 | | - if (optlen == sizeof(struct sock_fprog)) { |
|---|
| 913 | | - struct sock_fprog fprog; |
|---|
| 1085 | + case SO_ATTACH_REUSEPORT_CBPF: { |
|---|
| 1086 | + struct sock_fprog fprog; |
|---|
| 914 | 1087 | |
|---|
| 915 | | - ret = -EFAULT; |
|---|
| 916 | | - if (copy_from_user(&fprog, optval, sizeof(fprog))) |
|---|
| 917 | | - break; |
|---|
| 918 | | - |
|---|
| 1088 | + ret = copy_bpf_fprog_from_user(&fprog, optval, optlen); |
|---|
| 1089 | + if (!ret) |
|---|
| 919 | 1090 | ret = sk_reuseport_attach_filter(&fprog, sk); |
|---|
| 920 | | - } |
|---|
| 921 | 1091 | break; |
|---|
| 922 | | - |
|---|
| 1092 | + } |
|---|
| 923 | 1093 | case SO_ATTACH_REUSEPORT_EBPF: |
|---|
| 924 | 1094 | ret = -EINVAL; |
|---|
| 925 | 1095 | if (optlen == sizeof(u32)) { |
|---|
| 926 | 1096 | u32 ufd; |
|---|
| 927 | 1097 | |
|---|
| 928 | 1098 | ret = -EFAULT; |
|---|
| 929 | | - if (copy_from_user(&ufd, optval, sizeof(ufd))) |
|---|
| 1099 | + if (copy_from_sockptr(&ufd, optval, sizeof(ufd))) |
|---|
| 930 | 1100 | break; |
|---|
| 931 | 1101 | |
|---|
| 932 | 1102 | ret = sk_reuseport_attach_bpf(ufd, sk); |
|---|
| 933 | 1103 | } |
|---|
| 1104 | + break; |
|---|
| 1105 | + |
|---|
| 1106 | + case SO_DETACH_REUSEPORT_BPF: |
|---|
| 1107 | + ret = reuseport_detach_prog(sk); |
|---|
| 934 | 1108 | break; |
|---|
| 935 | 1109 | |
|---|
| 936 | 1110 | case SO_DETACH_FILTER: |
|---|
| .. | .. |
|---|
| 951 | 1125 | clear_bit(SOCK_PASSSEC, &sock->flags); |
|---|
| 952 | 1126 | break; |
|---|
| 953 | 1127 | case SO_MARK: |
|---|
| 954 | | - if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) |
|---|
| 1128 | + if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) { |
|---|
| 955 | 1129 | ret = -EPERM; |
|---|
| 956 | | - else |
|---|
| 957 | | - sk->sk_mark = val; |
|---|
| 1130 | + break; |
|---|
| 1131 | + } |
|---|
| 1132 | + |
|---|
| 1133 | + __sock_set_mark(sk, val); |
|---|
| 958 | 1134 | break; |
|---|
| 959 | 1135 | |
|---|
| 960 | 1136 | case SO_RXQ_OVFL: |
|---|
| .. | .. |
|---|
| 995 | 1171 | #endif |
|---|
| 996 | 1172 | |
|---|
| 997 | 1173 | case SO_MAX_PACING_RATE: |
|---|
| 998 | | - if (val != ~0U) |
|---|
| 1174 | + { |
|---|
| 1175 | + unsigned long ulval = (val == ~0U) ? ~0UL : (unsigned int)val; |
|---|
| 1176 | + |
|---|
| 1177 | + if (sizeof(ulval) != sizeof(val) && |
|---|
| 1178 | + optlen >= sizeof(ulval) && |
|---|
| 1179 | + copy_from_sockptr(&ulval, optval, sizeof(ulval))) { |
|---|
| 1180 | + ret = -EFAULT; |
|---|
| 1181 | + break; |
|---|
| 1182 | + } |
|---|
| 1183 | + if (ulval != ~0UL) |
|---|
| 999 | 1184 | cmpxchg(&sk->sk_pacing_status, |
|---|
| 1000 | 1185 | SK_PACING_NONE, |
|---|
| 1001 | 1186 | SK_PACING_NEEDED); |
|---|
| 1002 | | - sk->sk_max_pacing_rate = val; |
|---|
| 1003 | | - sk->sk_pacing_rate = min(sk->sk_pacing_rate, |
|---|
| 1004 | | - sk->sk_max_pacing_rate); |
|---|
| 1187 | + sk->sk_max_pacing_rate = ulval; |
|---|
| 1188 | + sk->sk_pacing_rate = min(sk->sk_pacing_rate, ulval); |
|---|
| 1005 | 1189 | break; |
|---|
| 1006 | | - |
|---|
| 1190 | + } |
|---|
| 1007 | 1191 | case SO_INCOMING_CPU: |
|---|
| 1008 | 1192 | WRITE_ONCE(sk->sk_incoming_cpu, val); |
|---|
| 1009 | 1193 | break; |
|---|
| .. | .. |
|---|
| 1015 | 1199 | |
|---|
| 1016 | 1200 | case SO_ZEROCOPY: |
|---|
| 1017 | 1201 | if (sk->sk_family == PF_INET || sk->sk_family == PF_INET6) { |
|---|
| 1018 | | - if (sk->sk_protocol != IPPROTO_TCP) |
|---|
| 1202 | + if (!((sk->sk_type == SOCK_STREAM && |
|---|
| 1203 | + sk->sk_protocol == IPPROTO_TCP) || |
|---|
| 1204 | + (sk->sk_type == SOCK_DGRAM && |
|---|
| 1205 | + sk->sk_protocol == IPPROTO_UDP))) |
|---|
| 1019 | 1206 | ret = -ENOTSUPP; |
|---|
| 1020 | 1207 | } else if (sk->sk_family != PF_RDS) { |
|---|
| 1021 | 1208 | ret = -ENOTSUPP; |
|---|
| .. | .. |
|---|
| 1029 | 1216 | break; |
|---|
| 1030 | 1217 | |
|---|
| 1031 | 1218 | case SO_TXTIME: |
|---|
| 1032 | | - if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) { |
|---|
| 1033 | | - ret = -EPERM; |
|---|
| 1034 | | - } else if (optlen != sizeof(struct sock_txtime)) { |
|---|
| 1219 | + if (optlen != sizeof(struct sock_txtime)) { |
|---|
| 1035 | 1220 | ret = -EINVAL; |
|---|
| 1036 | | - } else if (copy_from_user(&sk_txtime, optval, |
|---|
| 1221 | + break; |
|---|
| 1222 | + } else if (copy_from_sockptr(&sk_txtime, optval, |
|---|
| 1037 | 1223 | sizeof(struct sock_txtime))) { |
|---|
| 1038 | 1224 | ret = -EFAULT; |
|---|
| 1225 | + break; |
|---|
| 1039 | 1226 | } else if (sk_txtime.flags & ~SOF_TXTIME_FLAGS_MASK) { |
|---|
| 1040 | 1227 | ret = -EINVAL; |
|---|
| 1041 | | - } else { |
|---|
| 1042 | | - sock_valbool_flag(sk, SOCK_TXTIME, true); |
|---|
| 1043 | | - sk->sk_clockid = sk_txtime.clockid; |
|---|
| 1044 | | - sk->sk_txtime_deadline_mode = |
|---|
| 1045 | | - !!(sk_txtime.flags & SOF_TXTIME_DEADLINE_MODE); |
|---|
| 1046 | | - sk->sk_txtime_report_errors = |
|---|
| 1047 | | - !!(sk_txtime.flags & SOF_TXTIME_REPORT_ERRORS); |
|---|
| 1228 | + break; |
|---|
| 1048 | 1229 | } |
|---|
| 1230 | + /* CLOCK_MONOTONIC is only used by sch_fq, and this packet |
|---|
| 1231 | + * scheduler has enough safe guards. |
|---|
| 1232 | + */ |
|---|
| 1233 | + if (sk_txtime.clockid != CLOCK_MONOTONIC && |
|---|
| 1234 | + !ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) { |
|---|
| 1235 | + ret = -EPERM; |
|---|
| 1236 | + break; |
|---|
| 1237 | + } |
|---|
| 1238 | + sock_valbool_flag(sk, SOCK_TXTIME, true); |
|---|
| 1239 | + sk->sk_clockid = sk_txtime.clockid; |
|---|
| 1240 | + sk->sk_txtime_deadline_mode = |
|---|
| 1241 | + !!(sk_txtime.flags & SOF_TXTIME_DEADLINE_MODE); |
|---|
| 1242 | + sk->sk_txtime_report_errors = |
|---|
| 1243 | + !!(sk_txtime.flags & SOF_TXTIME_REPORT_ERRORS); |
|---|
| 1244 | + break; |
|---|
| 1245 | + |
|---|
| 1246 | + case SO_BINDTOIFINDEX: |
|---|
| 1247 | + ret = sock_bindtoindex_locked(sk, val); |
|---|
| 1049 | 1248 | break; |
|---|
| 1050 | 1249 | |
|---|
| 1051 | 1250 | default: |
|---|
| .. | .. |
|---|
| 1101 | 1300 | union { |
|---|
| 1102 | 1301 | int val; |
|---|
| 1103 | 1302 | u64 val64; |
|---|
| 1303 | + unsigned long ulval; |
|---|
| 1104 | 1304 | struct linger ling; |
|---|
| 1105 | | - struct timeval tm; |
|---|
| 1305 | + struct old_timeval32 tm32; |
|---|
| 1306 | + struct __kernel_old_timeval tm; |
|---|
| 1307 | + struct __kernel_sock_timeval stm; |
|---|
| 1106 | 1308 | struct sock_txtime txtime; |
|---|
| 1107 | 1309 | } v; |
|---|
| 1108 | 1310 | |
|---|
| .. | .. |
|---|
| 1186 | 1388 | break; |
|---|
| 1187 | 1389 | |
|---|
| 1188 | 1390 | case SO_BSDCOMPAT: |
|---|
| 1189 | | - sock_warn_obsolete_bsdism("getsockopt"); |
|---|
| 1190 | 1391 | break; |
|---|
| 1191 | 1392 | |
|---|
| 1192 | | - case SO_TIMESTAMP: |
|---|
| 1393 | + case SO_TIMESTAMP_OLD: |
|---|
| 1193 | 1394 | v.val = sock_flag(sk, SOCK_RCVTSTAMP) && |
|---|
| 1395 | + !sock_flag(sk, SOCK_TSTAMP_NEW) && |
|---|
| 1194 | 1396 | !sock_flag(sk, SOCK_RCVTSTAMPNS); |
|---|
| 1195 | 1397 | break; |
|---|
| 1196 | 1398 | |
|---|
| 1197 | | - case SO_TIMESTAMPNS: |
|---|
| 1198 | | - v.val = sock_flag(sk, SOCK_RCVTSTAMPNS); |
|---|
| 1399 | + case SO_TIMESTAMPNS_OLD: |
|---|
| 1400 | + v.val = sock_flag(sk, SOCK_RCVTSTAMPNS) && !sock_flag(sk, SOCK_TSTAMP_NEW); |
|---|
| 1199 | 1401 | break; |
|---|
| 1200 | 1402 | |
|---|
| 1201 | | - case SO_TIMESTAMPING: |
|---|
| 1403 | + case SO_TIMESTAMP_NEW: |
|---|
| 1404 | + v.val = sock_flag(sk, SOCK_RCVTSTAMP) && sock_flag(sk, SOCK_TSTAMP_NEW); |
|---|
| 1405 | + break; |
|---|
| 1406 | + |
|---|
| 1407 | + case SO_TIMESTAMPNS_NEW: |
|---|
| 1408 | + v.val = sock_flag(sk, SOCK_RCVTSTAMPNS) && sock_flag(sk, SOCK_TSTAMP_NEW); |
|---|
| 1409 | + break; |
|---|
| 1410 | + |
|---|
| 1411 | + case SO_TIMESTAMPING_OLD: |
|---|
| 1202 | 1412 | v.val = sk->sk_tsflags; |
|---|
| 1203 | 1413 | break; |
|---|
| 1204 | 1414 | |
|---|
| 1205 | | - case SO_RCVTIMEO: |
|---|
| 1206 | | - lv = sizeof(struct timeval); |
|---|
| 1207 | | - if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) { |
|---|
| 1208 | | - v.tm.tv_sec = 0; |
|---|
| 1209 | | - v.tm.tv_usec = 0; |
|---|
| 1210 | | - } else { |
|---|
| 1211 | | - v.tm.tv_sec = sk->sk_rcvtimeo / HZ; |
|---|
| 1212 | | - v.tm.tv_usec = ((sk->sk_rcvtimeo % HZ) * USEC_PER_SEC) / HZ; |
|---|
| 1213 | | - } |
|---|
| 1415 | + case SO_RCVTIMEO_OLD: |
|---|
| 1416 | + case SO_RCVTIMEO_NEW: |
|---|
| 1417 | + lv = sock_get_timeout(sk->sk_rcvtimeo, &v, SO_RCVTIMEO_OLD == optname); |
|---|
| 1214 | 1418 | break; |
|---|
| 1215 | 1419 | |
|---|
| 1216 | | - case SO_SNDTIMEO: |
|---|
| 1217 | | - lv = sizeof(struct timeval); |
|---|
| 1218 | | - if (sk->sk_sndtimeo == MAX_SCHEDULE_TIMEOUT) { |
|---|
| 1219 | | - v.tm.tv_sec = 0; |
|---|
| 1220 | | - v.tm.tv_usec = 0; |
|---|
| 1221 | | - } else { |
|---|
| 1222 | | - v.tm.tv_sec = sk->sk_sndtimeo / HZ; |
|---|
| 1223 | | - v.tm.tv_usec = ((sk->sk_sndtimeo % HZ) * USEC_PER_SEC) / HZ; |
|---|
| 1224 | | - } |
|---|
| 1420 | + case SO_SNDTIMEO_OLD: |
|---|
| 1421 | + case SO_SNDTIMEO_NEW: |
|---|
| 1422 | + lv = sock_get_timeout(sk->sk_sndtimeo, &v, SO_SNDTIMEO_OLD == optname); |
|---|
| 1225 | 1423 | break; |
|---|
| 1226 | 1424 | |
|---|
| 1227 | 1425 | case SO_RCVLOWAT: |
|---|
| .. | .. |
|---|
| 1354 | 1552 | #endif |
|---|
| 1355 | 1553 | |
|---|
| 1356 | 1554 | case SO_MAX_PACING_RATE: |
|---|
| 1357 | | - v.val = sk->sk_max_pacing_rate; |
|---|
| 1555 | + if (sizeof(v.ulval) != sizeof(v.val) && len >= sizeof(v.ulval)) { |
|---|
| 1556 | + lv = sizeof(v.ulval); |
|---|
| 1557 | + v.ulval = sk->sk_max_pacing_rate; |
|---|
| 1558 | + } else { |
|---|
| 1559 | + /* 32bit version */ |
|---|
| 1560 | + v.val = min_t(unsigned long, sk->sk_max_pacing_rate, ~0U); |
|---|
| 1561 | + } |
|---|
| 1358 | 1562 | break; |
|---|
| 1359 | 1563 | |
|---|
| 1360 | 1564 | case SO_INCOMING_CPU: |
|---|
| .. | .. |
|---|
| 1405 | 1609 | SOF_TXTIME_REPORT_ERRORS : 0; |
|---|
| 1406 | 1610 | break; |
|---|
| 1407 | 1611 | |
|---|
| 1612 | + case SO_BINDTOIFINDEX: |
|---|
| 1613 | + v.val = sk->sk_bound_dev_if; |
|---|
| 1614 | + break; |
|---|
| 1615 | + |
|---|
| 1408 | 1616 | default: |
|---|
| 1409 | 1617 | /* We implement the SO_SNDLOWAT etc to not be settable |
|---|
| 1410 | 1618 | * (1003.1g 7). |
|---|
| .. | .. |
|---|
| 1452 | 1660 | */ |
|---|
| 1453 | 1661 | static void sock_copy(struct sock *nsk, const struct sock *osk) |
|---|
| 1454 | 1662 | { |
|---|
| 1663 | + const struct proto *prot = READ_ONCE(osk->sk_prot); |
|---|
| 1455 | 1664 | #ifdef CONFIG_SECURITY_NETWORK |
|---|
| 1456 | 1665 | void *sptr = nsk->sk_security; |
|---|
| 1457 | 1666 | #endif |
|---|
| 1458 | 1667 | memcpy(nsk, osk, offsetof(struct sock, sk_dontcopy_begin)); |
|---|
| 1459 | 1668 | |
|---|
| 1460 | 1669 | memcpy(&nsk->sk_dontcopy_end, &osk->sk_dontcopy_end, |
|---|
| 1461 | | - osk->sk_prot->obj_size - offsetof(struct sock, sk_dontcopy_end)); |
|---|
| 1670 | + prot->obj_size - offsetof(struct sock, sk_dontcopy_end)); |
|---|
| 1462 | 1671 | |
|---|
| 1463 | 1672 | #ifdef CONFIG_SECURITY_NETWORK |
|---|
| 1464 | 1673 | nsk->sk_security = sptr; |
|---|
| .. | .. |
|---|
| 1584 | 1793 | |
|---|
| 1585 | 1794 | sock_disable_timestamp(sk, SK_FLAGS_TIMESTAMP); |
|---|
| 1586 | 1795 | |
|---|
| 1796 | +#ifdef CONFIG_BPF_SYSCALL |
|---|
| 1797 | + bpf_sk_storage_free(sk); |
|---|
| 1798 | +#endif |
|---|
| 1799 | + |
|---|
| 1587 | 1800 | if (atomic_read(&sk->sk_omem_alloc)) |
|---|
| 1588 | 1801 | pr_debug("%s: optmem leakage (%d bytes) detected\n", |
|---|
| 1589 | 1802 | __func__, atomic_read(&sk->sk_omem_alloc)); |
|---|
| .. | .. |
|---|
| 1670 | 1883 | */ |
|---|
| 1671 | 1884 | struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) |
|---|
| 1672 | 1885 | { |
|---|
| 1673 | | - struct sock *newsk; |
|---|
| 1886 | + struct proto *prot = READ_ONCE(sk->sk_prot); |
|---|
| 1887 | + struct sk_filter *filter; |
|---|
| 1674 | 1888 | bool is_charged = true; |
|---|
| 1889 | + struct sock *newsk; |
|---|
| 1675 | 1890 | |
|---|
| 1676 | | - newsk = sk_prot_alloc(sk->sk_prot, priority, sk->sk_family); |
|---|
| 1677 | | - if (newsk != NULL) { |
|---|
| 1678 | | - struct sk_filter *filter; |
|---|
| 1891 | + newsk = sk_prot_alloc(prot, priority, sk->sk_family); |
|---|
| 1892 | + if (!newsk) |
|---|
| 1893 | + goto out; |
|---|
| 1679 | 1894 | |
|---|
| 1680 | | - sock_copy(newsk, sk); |
|---|
| 1895 | + sock_copy(newsk, sk); |
|---|
| 1681 | 1896 | |
|---|
| 1682 | | - newsk->sk_prot_creator = sk->sk_prot; |
|---|
| 1897 | + newsk->sk_prot_creator = prot; |
|---|
| 1683 | 1898 | |
|---|
| 1684 | | - /* SANITY */ |
|---|
| 1685 | | - if (likely(newsk->sk_net_refcnt)) |
|---|
| 1686 | | - get_net(sock_net(newsk)); |
|---|
| 1687 | | - sk_node_init(&newsk->sk_node); |
|---|
| 1688 | | - sock_lock_init(newsk); |
|---|
| 1689 | | - bh_lock_sock(newsk); |
|---|
| 1690 | | - newsk->sk_backlog.head = newsk->sk_backlog.tail = NULL; |
|---|
| 1691 | | - newsk->sk_backlog.len = 0; |
|---|
| 1692 | | - |
|---|
| 1693 | | - atomic_set(&newsk->sk_rmem_alloc, 0); |
|---|
| 1694 | | - /* |
|---|
| 1695 | | - * sk_wmem_alloc set to one (see sk_free() and sock_wfree()) |
|---|
| 1696 | | - */ |
|---|
| 1697 | | - refcount_set(&newsk->sk_wmem_alloc, 1); |
|---|
| 1698 | | - atomic_set(&newsk->sk_omem_alloc, 0); |
|---|
| 1699 | | - sk_init_common(newsk); |
|---|
| 1700 | | - |
|---|
| 1701 | | - newsk->sk_dst_cache = NULL; |
|---|
| 1702 | | - newsk->sk_dst_pending_confirm = 0; |
|---|
| 1703 | | - newsk->sk_wmem_queued = 0; |
|---|
| 1704 | | - newsk->sk_forward_alloc = 0; |
|---|
| 1705 | | - atomic_set(&newsk->sk_drops, 0); |
|---|
| 1706 | | - newsk->sk_send_head = NULL; |
|---|
| 1707 | | - newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK; |
|---|
| 1708 | | - atomic_set(&newsk->sk_zckey, 0); |
|---|
| 1709 | | - |
|---|
| 1710 | | - sock_reset_flag(newsk, SOCK_DONE); |
|---|
| 1711 | | - |
|---|
| 1712 | | - /* sk->sk_memcg will be populated at accept() time */ |
|---|
| 1713 | | - newsk->sk_memcg = NULL; |
|---|
| 1714 | | - |
|---|
| 1715 | | - cgroup_sk_clone(&newsk->sk_cgrp_data); |
|---|
| 1716 | | - |
|---|
| 1717 | | - rcu_read_lock(); |
|---|
| 1718 | | - filter = rcu_dereference(sk->sk_filter); |
|---|
| 1719 | | - if (filter != NULL) |
|---|
| 1720 | | - /* though it's an empty new sock, the charging may fail |
|---|
| 1721 | | - * if sysctl_optmem_max was changed between creation of |
|---|
| 1722 | | - * original socket and cloning |
|---|
| 1723 | | - */ |
|---|
| 1724 | | - is_charged = sk_filter_charge(newsk, filter); |
|---|
| 1725 | | - RCU_INIT_POINTER(newsk->sk_filter, filter); |
|---|
| 1726 | | - rcu_read_unlock(); |
|---|
| 1727 | | - |
|---|
| 1728 | | - if (unlikely(!is_charged || xfrm_sk_clone_policy(newsk, sk))) { |
|---|
| 1729 | | - /* We need to make sure that we don't uncharge the new |
|---|
| 1730 | | - * socket if we couldn't charge it in the first place |
|---|
| 1731 | | - * as otherwise we uncharge the parent's filter. |
|---|
| 1732 | | - */ |
|---|
| 1733 | | - if (!is_charged) |
|---|
| 1734 | | - RCU_INIT_POINTER(newsk->sk_filter, NULL); |
|---|
| 1735 | | - sk_free_unlock_clone(newsk); |
|---|
| 1736 | | - newsk = NULL; |
|---|
| 1737 | | - goto out; |
|---|
| 1738 | | - } |
|---|
| 1739 | | - RCU_INIT_POINTER(newsk->sk_reuseport_cb, NULL); |
|---|
| 1740 | | - |
|---|
| 1741 | | - newsk->sk_err = 0; |
|---|
| 1742 | | - newsk->sk_err_soft = 0; |
|---|
| 1743 | | - newsk->sk_priority = 0; |
|---|
| 1744 | | - newsk->sk_incoming_cpu = raw_smp_processor_id(); |
|---|
| 1745 | | - atomic64_set(&newsk->sk_cookie, 0); |
|---|
| 1746 | | - if (likely(newsk->sk_net_refcnt)) |
|---|
| 1747 | | - sock_inuse_add(sock_net(newsk), 1); |
|---|
| 1748 | | - |
|---|
| 1749 | | - /* |
|---|
| 1750 | | - * Before updating sk_refcnt, we must commit prior changes to memory |
|---|
| 1751 | | - * (Documentation/RCU/rculist_nulls.txt for details) |
|---|
| 1752 | | - */ |
|---|
| 1753 | | - smp_wmb(); |
|---|
| 1754 | | - refcount_set(&newsk->sk_refcnt, 2); |
|---|
| 1755 | | - |
|---|
| 1756 | | - /* |
|---|
| 1757 | | - * Increment the counter in the same struct proto as the master |
|---|
| 1758 | | - * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that |
|---|
| 1759 | | - * is the same as sk->sk_prot->socks, as this field was copied |
|---|
| 1760 | | - * with memcpy). |
|---|
| 1761 | | - * |
|---|
| 1762 | | - * This _changes_ the previous behaviour, where |
|---|
| 1763 | | - * tcp_create_openreq_child always was incrementing the |
|---|
| 1764 | | - * equivalent to tcp_prot->socks (inet_sock_nr), so this have |
|---|
| 1765 | | - * to be taken into account in all callers. -acme |
|---|
| 1766 | | - */ |
|---|
| 1767 | | - sk_refcnt_debug_inc(newsk); |
|---|
| 1768 | | - sk_set_socket(newsk, NULL); |
|---|
| 1769 | | - sk_tx_queue_clear(newsk); |
|---|
| 1770 | | - newsk->sk_wq = NULL; |
|---|
| 1771 | | - |
|---|
| 1772 | | - if (newsk->sk_prot->sockets_allocated) |
|---|
| 1773 | | - sk_sockets_allocated_inc(newsk); |
|---|
| 1774 | | - |
|---|
| 1775 | | - if (sock_needs_netstamp(sk) && |
|---|
| 1776 | | - newsk->sk_flags & SK_FLAGS_TIMESTAMP) |
|---|
| 1777 | | - net_enable_timestamp(); |
|---|
| 1899 | + /* SANITY */ |
|---|
| 1900 | + if (likely(newsk->sk_net_refcnt)) { |
|---|
| 1901 | + get_net(sock_net(newsk)); |
|---|
| 1902 | + sock_inuse_add(sock_net(newsk), 1); |
|---|
| 1778 | 1903 | } |
|---|
| 1904 | + sk_node_init(&newsk->sk_node); |
|---|
| 1905 | + sock_lock_init(newsk); |
|---|
| 1906 | + bh_lock_sock(newsk); |
|---|
| 1907 | + newsk->sk_backlog.head = newsk->sk_backlog.tail = NULL; |
|---|
| 1908 | + newsk->sk_backlog.len = 0; |
|---|
| 1909 | + |
|---|
| 1910 | + atomic_set(&newsk->sk_rmem_alloc, 0); |
|---|
| 1911 | + |
|---|
| 1912 | + /* sk_wmem_alloc set to one (see sk_free() and sock_wfree()) */ |
|---|
| 1913 | + refcount_set(&newsk->sk_wmem_alloc, 1); |
|---|
| 1914 | + |
|---|
| 1915 | + atomic_set(&newsk->sk_omem_alloc, 0); |
|---|
| 1916 | + sk_init_common(newsk); |
|---|
| 1917 | + |
|---|
| 1918 | + newsk->sk_dst_cache = NULL; |
|---|
| 1919 | + newsk->sk_dst_pending_confirm = 0; |
|---|
| 1920 | + newsk->sk_wmem_queued = 0; |
|---|
| 1921 | + newsk->sk_forward_alloc = 0; |
|---|
| 1922 | + atomic_set(&newsk->sk_drops, 0); |
|---|
| 1923 | + newsk->sk_send_head = NULL; |
|---|
| 1924 | + newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK; |
|---|
| 1925 | + atomic_set(&newsk->sk_zckey, 0); |
|---|
| 1926 | + |
|---|
| 1927 | + sock_reset_flag(newsk, SOCK_DONE); |
|---|
| 1928 | + |
|---|
| 1929 | + /* sk->sk_memcg will be populated at accept() time */ |
|---|
| 1930 | + newsk->sk_memcg = NULL; |
|---|
| 1931 | + |
|---|
| 1932 | + cgroup_sk_clone(&newsk->sk_cgrp_data); |
|---|
| 1933 | + |
|---|
| 1934 | + rcu_read_lock(); |
|---|
| 1935 | + filter = rcu_dereference(sk->sk_filter); |
|---|
| 1936 | + if (filter != NULL) |
|---|
| 1937 | + /* though it's an empty new sock, the charging may fail |
|---|
| 1938 | + * if sysctl_optmem_max was changed between creation of |
|---|
| 1939 | + * original socket and cloning |
|---|
| 1940 | + */ |
|---|
| 1941 | + is_charged = sk_filter_charge(newsk, filter); |
|---|
| 1942 | + RCU_INIT_POINTER(newsk->sk_filter, filter); |
|---|
| 1943 | + rcu_read_unlock(); |
|---|
| 1944 | + |
|---|
| 1945 | + if (unlikely(!is_charged || xfrm_sk_clone_policy(newsk, sk))) { |
|---|
| 1946 | + /* We need to make sure that we don't uncharge the new |
|---|
| 1947 | + * socket if we couldn't charge it in the first place |
|---|
| 1948 | + * as otherwise we uncharge the parent's filter. |
|---|
| 1949 | + */ |
|---|
| 1950 | + if (!is_charged) |
|---|
| 1951 | + RCU_INIT_POINTER(newsk->sk_filter, NULL); |
|---|
| 1952 | + sk_free_unlock_clone(newsk); |
|---|
| 1953 | + newsk = NULL; |
|---|
| 1954 | + goto out; |
|---|
| 1955 | + } |
|---|
| 1956 | + RCU_INIT_POINTER(newsk->sk_reuseport_cb, NULL); |
|---|
| 1957 | + |
|---|
| 1958 | + if (bpf_sk_storage_clone(sk, newsk)) { |
|---|
| 1959 | + sk_free_unlock_clone(newsk); |
|---|
| 1960 | + newsk = NULL; |
|---|
| 1961 | + goto out; |
|---|
| 1962 | + } |
|---|
| 1963 | + |
|---|
| 1964 | + /* Clear sk_user_data if parent had the pointer tagged |
|---|
| 1965 | + * as not suitable for copying when cloning. |
|---|
| 1966 | + */ |
|---|
| 1967 | + if (sk_user_data_is_nocopy(newsk)) |
|---|
| 1968 | + newsk->sk_user_data = NULL; |
|---|
| 1969 | + |
|---|
| 1970 | + newsk->sk_err = 0; |
|---|
| 1971 | + newsk->sk_err_soft = 0; |
|---|
| 1972 | + newsk->sk_priority = 0; |
|---|
| 1973 | + newsk->sk_incoming_cpu = raw_smp_processor_id(); |
|---|
| 1974 | + |
|---|
| 1975 | + /* Before updating sk_refcnt, we must commit prior changes to memory |
|---|
| 1976 | + * (Documentation/RCU/rculist_nulls.rst for details) |
|---|
| 1977 | + */ |
|---|
| 1978 | + smp_wmb(); |
|---|
| 1979 | + refcount_set(&newsk->sk_refcnt, 2); |
|---|
| 1980 | + |
|---|
| 1981 | + /* Increment the counter in the same struct proto as the master |
|---|
| 1982 | + * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that |
|---|
| 1983 | + * is the same as sk->sk_prot->socks, as this field was copied |
|---|
| 1984 | + * with memcpy). |
|---|
| 1985 | + * |
|---|
| 1986 | + * This _changes_ the previous behaviour, where |
|---|
| 1987 | + * tcp_create_openreq_child always was incrementing the |
|---|
| 1988 | + * equivalent to tcp_prot->socks (inet_sock_nr), so this have |
|---|
| 1989 | + * to be taken into account in all callers. -acme |
|---|
| 1990 | + */ |
|---|
| 1991 | + sk_refcnt_debug_inc(newsk); |
|---|
| 1992 | + sk_set_socket(newsk, NULL); |
|---|
| 1993 | + sk_tx_queue_clear(newsk); |
|---|
| 1994 | + RCU_INIT_POINTER(newsk->sk_wq, NULL); |
|---|
| 1995 | + |
|---|
| 1996 | + if (newsk->sk_prot->sockets_allocated) |
|---|
| 1997 | + sk_sockets_allocated_inc(newsk); |
|---|
| 1998 | + |
|---|
| 1999 | + if (sock_needs_netstamp(sk) && newsk->sk_flags & SK_FLAGS_TIMESTAMP) |
|---|
| 2000 | + net_enable_timestamp(); |
|---|
| 1779 | 2001 | out: |
|---|
| 1780 | 2002 | return newsk; |
|---|
| 1781 | 2003 | } |
|---|
| .. | .. |
|---|
| 1877 | 2099 | } |
|---|
| 1878 | 2100 | EXPORT_SYMBOL(skb_set_owner_w); |
|---|
| 1879 | 2101 | |
|---|
| 2102 | +static bool can_skb_orphan_partial(const struct sk_buff *skb) |
|---|
| 2103 | +{ |
|---|
| 2104 | +#ifdef CONFIG_TLS_DEVICE |
|---|
| 2105 | + /* Drivers depend on in-order delivery for crypto offload, |
|---|
| 2106 | + * partial orphan breaks out-of-order-OK logic. |
|---|
| 2107 | + */ |
|---|
| 2108 | + if (skb->decrypted) |
|---|
| 2109 | + return false; |
|---|
| 2110 | +#endif |
|---|
| 2111 | + return (skb->destructor == sock_wfree || |
|---|
| 2112 | + (IS_ENABLED(CONFIG_INET) && skb->destructor == tcp_wfree)); |
|---|
| 2113 | +} |
|---|
| 2114 | + |
|---|
| 1880 | 2115 | /* This helper is used by netem, as it can hold packets in its |
|---|
| 1881 | 2116 | * delay queue. We want to allow the owner socket to send more |
|---|
| 1882 | 2117 | * packets, as if they were already TX completed by a typical driver. |
|---|
| .. | .. |
|---|
| 1888 | 2123 | if (skb_is_tcp_pure_ack(skb)) |
|---|
| 1889 | 2124 | return; |
|---|
| 1890 | 2125 | |
|---|
| 1891 | | - if (skb->destructor == sock_wfree |
|---|
| 1892 | | -#ifdef CONFIG_INET |
|---|
| 1893 | | - || skb->destructor == tcp_wfree |
|---|
| 1894 | | -#endif |
|---|
| 1895 | | - ) { |
|---|
| 1896 | | - struct sock *sk = skb->sk; |
|---|
| 2126 | + if (can_skb_orphan_partial(skb) && skb_set_owner_sk_safe(skb, skb->sk)) |
|---|
| 2127 | + return; |
|---|
| 1897 | 2128 | |
|---|
| 1898 | | - if (refcount_inc_not_zero(&sk->sk_refcnt)) { |
|---|
| 1899 | | - WARN_ON(refcount_sub_and_test(skb->truesize, &sk->sk_wmem_alloc)); |
|---|
| 1900 | | - skb->destructor = sock_efree; |
|---|
| 1901 | | - } |
|---|
| 1902 | | - } else { |
|---|
| 1903 | | - skb_orphan(skb); |
|---|
| 1904 | | - } |
|---|
| 2129 | + skb_orphan(skb); |
|---|
| 1905 | 2130 | } |
|---|
| 1906 | 2131 | EXPORT_SYMBOL(skb_orphan_partial); |
|---|
| 1907 | 2132 | |
|---|
| .. | .. |
|---|
| 1927 | 2152 | sock_put(skb->sk); |
|---|
| 1928 | 2153 | } |
|---|
| 1929 | 2154 | EXPORT_SYMBOL(sock_efree); |
|---|
| 2155 | + |
|---|
| 2156 | +/* Buffer destructor for prefetch/receive path where reference count may |
|---|
| 2157 | + * not be held, e.g. for listen sockets. |
|---|
| 2158 | + */ |
|---|
| 2159 | +#ifdef CONFIG_INET |
|---|
| 2160 | +void sock_pfree(struct sk_buff *skb) |
|---|
| 2161 | +{ |
|---|
| 2162 | + if (sk_is_refcounted(skb->sk)) |
|---|
| 2163 | + sock_gen_put(skb->sk); |
|---|
| 2164 | +} |
|---|
| 2165 | +EXPORT_SYMBOL(sock_pfree); |
|---|
| 2166 | +#endif /* CONFIG_INET */ |
|---|
| 1930 | 2167 | |
|---|
| 1931 | 2168 | kuid_t sock_i_uid(struct sock *sk) |
|---|
| 1932 | 2169 | { |
|---|
| .. | .. |
|---|
| 1956 | 2193 | struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, |
|---|
| 1957 | 2194 | gfp_t priority) |
|---|
| 1958 | 2195 | { |
|---|
| 1959 | | - if (force || refcount_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) { |
|---|
| 2196 | + if (force || |
|---|
| 2197 | + refcount_read(&sk->sk_wmem_alloc) < READ_ONCE(sk->sk_sndbuf)) { |
|---|
| 1960 | 2198 | struct sk_buff *skb = alloc_skb(size, priority); |
|---|
| 2199 | + |
|---|
| 1961 | 2200 | if (skb) { |
|---|
| 1962 | 2201 | skb_set_owner_w(skb, sk); |
|---|
| 1963 | 2202 | return skb; |
|---|
| .. | .. |
|---|
| 1981 | 2220 | |
|---|
| 1982 | 2221 | /* small safe race: SKB_TRUESIZE may differ from final skb->truesize */ |
|---|
| 1983 | 2222 | if (atomic_read(&sk->sk_omem_alloc) + SKB_TRUESIZE(size) > |
|---|
| 1984 | | - sysctl_optmem_max) |
|---|
| 2223 | + READ_ONCE(sysctl_optmem_max)) |
|---|
| 1985 | 2224 | return NULL; |
|---|
| 1986 | 2225 | |
|---|
| 1987 | 2226 | skb = alloc_skb(size, priority); |
|---|
| .. | .. |
|---|
| 1999 | 2238 | */ |
|---|
| 2000 | 2239 | void *sock_kmalloc(struct sock *sk, int size, gfp_t priority) |
|---|
| 2001 | 2240 | { |
|---|
| 2002 | | - if ((unsigned int)size <= sysctl_optmem_max && |
|---|
| 2003 | | - atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) { |
|---|
| 2241 | + int optmem_max = READ_ONCE(sysctl_optmem_max); |
|---|
| 2242 | + |
|---|
| 2243 | + if ((unsigned int)size <= optmem_max && |
|---|
| 2244 | + atomic_read(&sk->sk_omem_alloc) + size < optmem_max) { |
|---|
| 2004 | 2245 | void *mem; |
|---|
| 2005 | 2246 | /* First do the add, to avoid the race if kmalloc |
|---|
| 2006 | 2247 | * might sleep. |
|---|
| .. | .. |
|---|
| 2025 | 2266 | if (WARN_ON_ONCE(!mem)) |
|---|
| 2026 | 2267 | return; |
|---|
| 2027 | 2268 | if (nullify) |
|---|
| 2028 | | - kzfree(mem); |
|---|
| 2269 | + kfree_sensitive(mem); |
|---|
| 2029 | 2270 | else |
|---|
| 2030 | 2271 | kfree(mem); |
|---|
| 2031 | 2272 | atomic_sub(size, &sk->sk_omem_alloc); |
|---|
| .. | .. |
|---|
| 2058 | 2299 | break; |
|---|
| 2059 | 2300 | set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); |
|---|
| 2060 | 2301 | prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); |
|---|
| 2061 | | - if (refcount_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) |
|---|
| 2302 | + if (refcount_read(&sk->sk_wmem_alloc) < READ_ONCE(sk->sk_sndbuf)) |
|---|
| 2062 | 2303 | break; |
|---|
| 2063 | 2304 | if (sk->sk_shutdown & SEND_SHUTDOWN) |
|---|
| 2064 | 2305 | break; |
|---|
| .. | .. |
|---|
| 2093 | 2334 | if (sk->sk_shutdown & SEND_SHUTDOWN) |
|---|
| 2094 | 2335 | goto failure; |
|---|
| 2095 | 2336 | |
|---|
| 2096 | | - if (sk_wmem_alloc_get(sk) < sk->sk_sndbuf) |
|---|
| 2337 | + if (sk_wmem_alloc_get(sk) < READ_ONCE(sk->sk_sndbuf)) |
|---|
| 2097 | 2338 | break; |
|---|
| 2098 | 2339 | |
|---|
| 2099 | 2340 | sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); |
|---|
| .. | .. |
|---|
| 2139 | 2380 | return -EINVAL; |
|---|
| 2140 | 2381 | sockc->mark = *(u32 *)CMSG_DATA(cmsg); |
|---|
| 2141 | 2382 | break; |
|---|
| 2142 | | - case SO_TIMESTAMPING: |
|---|
| 2383 | + case SO_TIMESTAMPING_OLD: |
|---|
| 2143 | 2384 | if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32))) |
|---|
| 2144 | 2385 | return -EINVAL; |
|---|
| 2145 | 2386 | |
|---|
| .. | .. |
|---|
| 2207 | 2448 | } |
|---|
| 2208 | 2449 | } |
|---|
| 2209 | 2450 | |
|---|
| 2210 | | -/* On 32bit arches, an skb frag is limited to 2^15 */ |
|---|
| 2211 | 2451 | #define SKB_FRAG_PAGE_ORDER get_order(32768) |
|---|
| 2452 | +DEFINE_STATIC_KEY_FALSE(net_high_order_alloc_disable_key); |
|---|
| 2212 | 2453 | |
|---|
| 2213 | 2454 | /** |
|---|
| 2214 | 2455 | * skb_page_frag_refill - check that a page_frag contains enough room |
|---|
| .. | .. |
|---|
| 2233 | 2474 | } |
|---|
| 2234 | 2475 | |
|---|
| 2235 | 2476 | pfrag->offset = 0; |
|---|
| 2236 | | - if (SKB_FRAG_PAGE_ORDER) { |
|---|
| 2477 | + if (SKB_FRAG_PAGE_ORDER && |
|---|
| 2478 | + !static_branch_unlikely(&net_high_order_alloc_disable_key)) { |
|---|
| 2237 | 2479 | /* Avoid direct reclaim but allow kswapd to wake */ |
|---|
| 2238 | 2480 | pfrag->page = alloc_pages((gfp & ~__GFP_DIRECT_RECLAIM) | |
|---|
| 2239 | 2481 | __GFP_COMP | __GFP_NOWARN | |
|---|
| .. | .. |
|---|
| 2263 | 2505 | return false; |
|---|
| 2264 | 2506 | } |
|---|
| 2265 | 2507 | EXPORT_SYMBOL(sk_page_frag_refill); |
|---|
| 2266 | | - |
|---|
| 2267 | | -int sk_alloc_sg(struct sock *sk, int len, struct scatterlist *sg, |
|---|
| 2268 | | - int sg_start, int *sg_curr_index, unsigned int *sg_curr_size, |
|---|
| 2269 | | - int first_coalesce) |
|---|
| 2270 | | -{ |
|---|
| 2271 | | - int sg_curr = *sg_curr_index, use = 0, rc = 0; |
|---|
| 2272 | | - unsigned int size = *sg_curr_size; |
|---|
| 2273 | | - struct page_frag *pfrag; |
|---|
| 2274 | | - struct scatterlist *sge; |
|---|
| 2275 | | - |
|---|
| 2276 | | - len -= size; |
|---|
| 2277 | | - pfrag = sk_page_frag(sk); |
|---|
| 2278 | | - |
|---|
| 2279 | | - while (len > 0) { |
|---|
| 2280 | | - unsigned int orig_offset; |
|---|
| 2281 | | - |
|---|
| 2282 | | - if (!sk_page_frag_refill(sk, pfrag)) { |
|---|
| 2283 | | - rc = -ENOMEM; |
|---|
| 2284 | | - goto out; |
|---|
| 2285 | | - } |
|---|
| 2286 | | - |
|---|
| 2287 | | - use = min_t(int, len, pfrag->size - pfrag->offset); |
|---|
| 2288 | | - |
|---|
| 2289 | | - if (!sk_wmem_schedule(sk, use)) { |
|---|
| 2290 | | - rc = -ENOMEM; |
|---|
| 2291 | | - goto out; |
|---|
| 2292 | | - } |
|---|
| 2293 | | - |
|---|
| 2294 | | - sk_mem_charge(sk, use); |
|---|
| 2295 | | - size += use; |
|---|
| 2296 | | - orig_offset = pfrag->offset; |
|---|
| 2297 | | - pfrag->offset += use; |
|---|
| 2298 | | - |
|---|
| 2299 | | - sge = sg + sg_curr - 1; |
|---|
| 2300 | | - if (sg_curr > first_coalesce && sg_page(sge) == pfrag->page && |
|---|
| 2301 | | - sge->offset + sge->length == orig_offset) { |
|---|
| 2302 | | - sge->length += use; |
|---|
| 2303 | | - } else { |
|---|
| 2304 | | - sge = sg + sg_curr; |
|---|
| 2305 | | - sg_unmark_end(sge); |
|---|
| 2306 | | - sg_set_page(sge, pfrag->page, use, orig_offset); |
|---|
| 2307 | | - get_page(pfrag->page); |
|---|
| 2308 | | - sg_curr++; |
|---|
| 2309 | | - |
|---|
| 2310 | | - if (sg_curr == MAX_SKB_FRAGS) |
|---|
| 2311 | | - sg_curr = 0; |
|---|
| 2312 | | - |
|---|
| 2313 | | - if (sg_curr == sg_start) { |
|---|
| 2314 | | - rc = -ENOSPC; |
|---|
| 2315 | | - break; |
|---|
| 2316 | | - } |
|---|
| 2317 | | - } |
|---|
| 2318 | | - |
|---|
| 2319 | | - len -= use; |
|---|
| 2320 | | - } |
|---|
| 2321 | | -out: |
|---|
| 2322 | | - *sg_curr_size = size; |
|---|
| 2323 | | - *sg_curr_index = sg_curr; |
|---|
| 2324 | | - return rc; |
|---|
| 2325 | | -} |
|---|
| 2326 | | -EXPORT_SYMBOL(sk_alloc_sg); |
|---|
| 2327 | 2508 | |
|---|
| 2328 | 2509 | static void __lock_sock(struct sock *sk) |
|---|
| 2329 | 2510 | __releases(&sk->sk_lock.slock) |
|---|
| .. | .. |
|---|
| 2358 | 2539 | next = skb->next; |
|---|
| 2359 | 2540 | prefetch(next); |
|---|
| 2360 | 2541 | WARN_ON_ONCE(skb_dst_is_noref(skb)); |
|---|
| 2361 | | - skb->next = NULL; |
|---|
| 2542 | + skb_mark_not_on_list(skb); |
|---|
| 2362 | 2543 | sk_backlog_rcv(sk, skb); |
|---|
| 2363 | 2544 | |
|---|
| 2364 | 2545 | cond_resched(); |
|---|
| .. | .. |
|---|
| 2614 | 2795 | } |
|---|
| 2615 | 2796 | EXPORT_SYMBOL(sock_no_shutdown); |
|---|
| 2616 | 2797 | |
|---|
| 2617 | | -int sock_no_setsockopt(struct socket *sock, int level, int optname, |
|---|
| 2618 | | - char __user *optval, unsigned int optlen) |
|---|
| 2619 | | -{ |
|---|
| 2620 | | - return -EOPNOTSUPP; |
|---|
| 2621 | | -} |
|---|
| 2622 | | -EXPORT_SYMBOL(sock_no_setsockopt); |
|---|
| 2623 | | - |
|---|
| 2624 | | -int sock_no_getsockopt(struct socket *sock, int level, int optname, |
|---|
| 2625 | | - char __user *optval, int __user *optlen) |
|---|
| 2626 | | -{ |
|---|
| 2627 | | - return -EOPNOTSUPP; |
|---|
| 2628 | | -} |
|---|
| 2629 | | -EXPORT_SYMBOL(sock_no_getsockopt); |
|---|
| 2630 | | - |
|---|
| 2631 | 2798 | int sock_no_sendmsg(struct socket *sock, struct msghdr *m, size_t len) |
|---|
| 2632 | 2799 | { |
|---|
| 2633 | 2800 | return -EOPNOTSUPP; |
|---|
| .. | .. |
|---|
| 2732 | 2899 | rcu_read_unlock(); |
|---|
| 2733 | 2900 | } |
|---|
| 2734 | 2901 | |
|---|
| 2735 | | -static void sock_def_readable(struct sock *sk) |
|---|
| 2902 | +void sock_def_readable(struct sock *sk) |
|---|
| 2736 | 2903 | { |
|---|
| 2737 | 2904 | struct socket_wq *wq; |
|---|
| 2738 | 2905 | |
|---|
| 2739 | 2906 | rcu_read_lock(); |
|---|
| 2740 | 2907 | wq = rcu_dereference(sk->sk_wq); |
|---|
| 2741 | | - if (skwq_has_sleeper(wq)) |
|---|
| 2908 | + |
|---|
| 2909 | + if (skwq_has_sleeper(wq)) { |
|---|
| 2910 | + int done = 0; |
|---|
| 2911 | + |
|---|
| 2912 | + trace_android_vh_do_wake_up_sync(&wq->wait, &done); |
|---|
| 2913 | + if (done) |
|---|
| 2914 | + goto out; |
|---|
| 2915 | + |
|---|
| 2742 | 2916 | wake_up_interruptible_sync_poll(&wq->wait, EPOLLIN | EPOLLPRI | |
|---|
| 2743 | 2917 | EPOLLRDNORM | EPOLLRDBAND); |
|---|
| 2918 | + } |
|---|
| 2919 | + |
|---|
| 2920 | +out: |
|---|
| 2744 | 2921 | sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN); |
|---|
| 2745 | 2922 | rcu_read_unlock(); |
|---|
| 2746 | 2923 | } |
|---|
| .. | .. |
|---|
| 2754 | 2931 | /* Do not wake up a writer until he can make "significant" |
|---|
| 2755 | 2932 | * progress. --DaveM |
|---|
| 2756 | 2933 | */ |
|---|
| 2757 | | - if ((refcount_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) { |
|---|
| 2934 | + if ((refcount_read(&sk->sk_wmem_alloc) << 1) <= READ_ONCE(sk->sk_sndbuf)) { |
|---|
| 2758 | 2935 | wq = rcu_dereference(sk->sk_wq); |
|---|
| 2759 | 2936 | if (skwq_has_sleeper(wq)) |
|---|
| 2760 | 2937 | wake_up_interruptible_sync_poll(&wq->wait, EPOLLOUT | |
|---|
| .. | .. |
|---|
| 2795 | 2972 | } |
|---|
| 2796 | 2973 | EXPORT_SYMBOL(sk_stop_timer); |
|---|
| 2797 | 2974 | |
|---|
| 2975 | +void sk_stop_timer_sync(struct sock *sk, struct timer_list *timer) |
|---|
| 2976 | +{ |
|---|
| 2977 | + if (del_timer_sync(timer)) |
|---|
| 2978 | + __sock_put(sk); |
|---|
| 2979 | +} |
|---|
| 2980 | +EXPORT_SYMBOL(sk_stop_timer_sync); |
|---|
| 2981 | + |
|---|
| 2798 | 2982 | void sock_init_data(struct socket *sock, struct sock *sk) |
|---|
| 2799 | 2983 | { |
|---|
| 2800 | 2984 | sk_init_common(sk); |
|---|
| .. | .. |
|---|
| 2803 | 2987 | timer_setup(&sk->sk_timer, NULL, 0); |
|---|
| 2804 | 2988 | |
|---|
| 2805 | 2989 | sk->sk_allocation = GFP_KERNEL; |
|---|
| 2806 | | - sk->sk_rcvbuf = sysctl_rmem_default; |
|---|
| 2807 | | - sk->sk_sndbuf = sysctl_wmem_default; |
|---|
| 2990 | + sk->sk_rcvbuf = READ_ONCE(sysctl_rmem_default); |
|---|
| 2991 | + sk->sk_sndbuf = READ_ONCE(sysctl_wmem_default); |
|---|
| 2808 | 2992 | sk->sk_state = TCP_CLOSE; |
|---|
| 2809 | 2993 | sk_set_socket(sk, sock); |
|---|
| 2810 | 2994 | |
|---|
| .. | .. |
|---|
| 2812 | 2996 | |
|---|
| 2813 | 2997 | if (sock) { |
|---|
| 2814 | 2998 | sk->sk_type = sock->type; |
|---|
| 2815 | | - sk->sk_wq = sock->wq; |
|---|
| 2999 | + RCU_INIT_POINTER(sk->sk_wq, &sock->wq); |
|---|
| 2816 | 3000 | sock->sk = sk; |
|---|
| 2817 | 3001 | sk->sk_uid = SOCK_INODE(sock)->i_uid; |
|---|
| 2818 | 3002 | } else { |
|---|
| 2819 | | - sk->sk_wq = NULL; |
|---|
| 3003 | + RCU_INIT_POINTER(sk->sk_wq, NULL); |
|---|
| 2820 | 3004 | sk->sk_uid = make_kuid(sock_net(sk)->user_ns, 0); |
|---|
| 2821 | 3005 | } |
|---|
| 2822 | 3006 | |
|---|
| .. | .. |
|---|
| 2859 | 3043 | |
|---|
| 2860 | 3044 | #ifdef CONFIG_NET_RX_BUSY_POLL |
|---|
| 2861 | 3045 | sk->sk_napi_id = 0; |
|---|
| 2862 | | - sk->sk_ll_usec = sysctl_net_busy_read; |
|---|
| 3046 | + sk->sk_ll_usec = READ_ONCE(sysctl_net_busy_read); |
|---|
| 2863 | 3047 | #endif |
|---|
| 2864 | 3048 | |
|---|
| 2865 | | - sk->sk_max_pacing_rate = ~0U; |
|---|
| 2866 | | - sk->sk_pacing_rate = ~0U; |
|---|
| 2867 | | - sk->sk_pacing_shift = 10; |
|---|
| 3049 | + sk->sk_max_pacing_rate = ~0UL; |
|---|
| 3050 | + sk->sk_pacing_rate = ~0UL; |
|---|
| 3051 | + WRITE_ONCE(sk->sk_pacing_shift, 10); |
|---|
| 2868 | 3052 | sk->sk_incoming_cpu = -1; |
|---|
| 2869 | 3053 | |
|---|
| 2870 | 3054 | sk_rx_queue_clear(sk); |
|---|
| 2871 | 3055 | /* |
|---|
| 2872 | 3056 | * Before updating sk_refcnt, we must commit prior changes to memory |
|---|
| 2873 | | - * (Documentation/RCU/rculist_nulls.txt for details) |
|---|
| 3057 | + * (Documentation/RCU/rculist_nulls.rst for details) |
|---|
| 2874 | 3058 | */ |
|---|
| 2875 | 3059 | smp_wmb(); |
|---|
| 2876 | 3060 | refcount_set(&sk->sk_refcnt, 1); |
|---|
| .. | .. |
|---|
| 2949 | 3133 | } |
|---|
| 2950 | 3134 | EXPORT_SYMBOL(lock_sock_fast); |
|---|
| 2951 | 3135 | |
|---|
| 2952 | | -int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp) |
|---|
| 3136 | +int sock_gettstamp(struct socket *sock, void __user *userstamp, |
|---|
| 3137 | + bool timeval, bool time32) |
|---|
| 2953 | 3138 | { |
|---|
| 2954 | | - struct timeval tv; |
|---|
| 3139 | + struct sock *sk = sock->sk; |
|---|
| 3140 | + struct timespec64 ts; |
|---|
| 2955 | 3141 | |
|---|
| 2956 | 3142 | sock_enable_timestamp(sk, SOCK_TIMESTAMP); |
|---|
| 2957 | | - tv = ktime_to_timeval(sock_read_timestamp(sk)); |
|---|
| 2958 | | - if (tv.tv_sec == -1) |
|---|
| 2959 | | - return -ENOENT; |
|---|
| 2960 | | - if (tv.tv_sec == 0) { |
|---|
| 2961 | | - ktime_t kt = ktime_get_real(); |
|---|
| 2962 | | - sock_write_timestamp(sk, kt); |
|---|
| 2963 | | - tv = ktime_to_timeval(kt); |
|---|
| 2964 | | - } |
|---|
| 2965 | | - return copy_to_user(userstamp, &tv, sizeof(tv)) ? -EFAULT : 0; |
|---|
| 2966 | | -} |
|---|
| 2967 | | -EXPORT_SYMBOL(sock_get_timestamp); |
|---|
| 2968 | | - |
|---|
| 2969 | | -int sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp) |
|---|
| 2970 | | -{ |
|---|
| 2971 | | - struct timespec ts; |
|---|
| 2972 | | - |
|---|
| 2973 | | - sock_enable_timestamp(sk, SOCK_TIMESTAMP); |
|---|
| 2974 | | - ts = ktime_to_timespec(sock_read_timestamp(sk)); |
|---|
| 3143 | + ts = ktime_to_timespec64(sock_read_timestamp(sk)); |
|---|
| 2975 | 3144 | if (ts.tv_sec == -1) |
|---|
| 2976 | 3145 | return -ENOENT; |
|---|
| 2977 | 3146 | if (ts.tv_sec == 0) { |
|---|
| 2978 | 3147 | ktime_t kt = ktime_get_real(); |
|---|
| 2979 | 3148 | sock_write_timestamp(sk, kt); |
|---|
| 2980 | | - ts = ktime_to_timespec(sk->sk_stamp); |
|---|
| 3149 | + ts = ktime_to_timespec64(kt); |
|---|
| 2981 | 3150 | } |
|---|
| 2982 | | - return copy_to_user(userstamp, &ts, sizeof(ts)) ? -EFAULT : 0; |
|---|
| 2983 | | -} |
|---|
| 2984 | | -EXPORT_SYMBOL(sock_get_timestampns); |
|---|
| 2985 | 3151 | |
|---|
| 2986 | | -void sock_enable_timestamp(struct sock *sk, int flag) |
|---|
| 3152 | + if (timeval) |
|---|
| 3153 | + ts.tv_nsec /= 1000; |
|---|
| 3154 | + |
|---|
| 3155 | +#ifdef CONFIG_COMPAT_32BIT_TIME |
|---|
| 3156 | + if (time32) |
|---|
| 3157 | + return put_old_timespec32(&ts, userstamp); |
|---|
| 3158 | +#endif |
|---|
| 3159 | +#ifdef CONFIG_SPARC64 |
|---|
| 3160 | + /* beware of padding in sparc64 timeval */ |
|---|
| 3161 | + if (timeval && !in_compat_syscall()) { |
|---|
| 3162 | + struct __kernel_old_timeval __user tv = { |
|---|
| 3163 | + .tv_sec = ts.tv_sec, |
|---|
| 3164 | + .tv_usec = ts.tv_nsec, |
|---|
| 3165 | + }; |
|---|
| 3166 | + if (copy_to_user(userstamp, &tv, sizeof(tv))) |
|---|
| 3167 | + return -EFAULT; |
|---|
| 3168 | + return 0; |
|---|
| 3169 | + } |
|---|
| 3170 | +#endif |
|---|
| 3171 | + return put_timespec64(&ts, userstamp); |
|---|
| 3172 | +} |
|---|
| 3173 | +EXPORT_SYMBOL(sock_gettstamp); |
|---|
| 3174 | + |
|---|
| 3175 | +void sock_enable_timestamp(struct sock *sk, enum sock_flags flag) |
|---|
| 2987 | 3176 | { |
|---|
| 2988 | 3177 | if (!sock_flag(sk, flag)) { |
|---|
| 2989 | 3178 | unsigned long previous_flags = sk->sk_flags; |
|---|
| .. | .. |
|---|
| 3052 | 3241 | } |
|---|
| 3053 | 3242 | EXPORT_SYMBOL(sock_common_getsockopt); |
|---|
| 3054 | 3243 | |
|---|
| 3055 | | -#ifdef CONFIG_COMPAT |
|---|
| 3056 | | -int compat_sock_common_getsockopt(struct socket *sock, int level, int optname, |
|---|
| 3057 | | - char __user *optval, int __user *optlen) |
|---|
| 3058 | | -{ |
|---|
| 3059 | | - struct sock *sk = sock->sk; |
|---|
| 3060 | | - |
|---|
| 3061 | | - if (sk->sk_prot->compat_getsockopt != NULL) |
|---|
| 3062 | | - return sk->sk_prot->compat_getsockopt(sk, level, optname, |
|---|
| 3063 | | - optval, optlen); |
|---|
| 3064 | | - return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen); |
|---|
| 3065 | | -} |
|---|
| 3066 | | -EXPORT_SYMBOL(compat_sock_common_getsockopt); |
|---|
| 3067 | | -#endif |
|---|
| 3068 | | - |
|---|
| 3069 | 3244 | int sock_common_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, |
|---|
| 3070 | 3245 | int flags) |
|---|
| 3071 | 3246 | { |
|---|
| .. | .. |
|---|
| 3085 | 3260 | * Set socket options on an inet socket. |
|---|
| 3086 | 3261 | */ |
|---|
| 3087 | 3262 | int sock_common_setsockopt(struct socket *sock, int level, int optname, |
|---|
| 3088 | | - char __user *optval, unsigned int optlen) |
|---|
| 3263 | + sockptr_t optval, unsigned int optlen) |
|---|
| 3089 | 3264 | { |
|---|
| 3090 | 3265 | struct sock *sk = sock->sk; |
|---|
| 3091 | 3266 | |
|---|
| .. | .. |
|---|
| 3093 | 3268 | } |
|---|
| 3094 | 3269 | EXPORT_SYMBOL(sock_common_setsockopt); |
|---|
| 3095 | 3270 | |
|---|
| 3096 | | -#ifdef CONFIG_COMPAT |
|---|
| 3097 | | -int compat_sock_common_setsockopt(struct socket *sock, int level, int optname, |
|---|
| 3098 | | - char __user *optval, unsigned int optlen) |
|---|
| 3099 | | -{ |
|---|
| 3100 | | - struct sock *sk = sock->sk; |
|---|
| 3101 | | - |
|---|
| 3102 | | - if (sk->sk_prot->compat_setsockopt != NULL) |
|---|
| 3103 | | - return sk->sk_prot->compat_setsockopt(sk, level, optname, |
|---|
| 3104 | | - optval, optlen); |
|---|
| 3105 | | - return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen); |
|---|
| 3106 | | -} |
|---|
| 3107 | | -EXPORT_SYMBOL(compat_sock_common_setsockopt); |
|---|
| 3108 | | -#endif |
|---|
| 3109 | | - |
|---|
| 3110 | 3271 | void sk_common_release(struct sock *sk) |
|---|
| 3111 | 3272 | { |
|---|
| 3112 | 3273 | if (sk->sk_prot->destroy) |
|---|
| 3113 | 3274 | sk->sk_prot->destroy(sk); |
|---|
| 3114 | 3275 | |
|---|
| 3115 | 3276 | /* |
|---|
| 3116 | | - * Observation: when sock_common_release is called, processes have |
|---|
| 3277 | + * Observation: when sk_common_release is called, processes have |
|---|
| 3117 | 3278 | * no access to socket. But net still has. |
|---|
| 3118 | 3279 | * Step one, detach it from networking: |
|---|
| 3119 | 3280 | * |
|---|
| .. | .. |
|---|
| 3149 | 3310 | memset(mem, 0, sizeof(*mem) * SK_MEMINFO_VARS); |
|---|
| 3150 | 3311 | |
|---|
| 3151 | 3312 | mem[SK_MEMINFO_RMEM_ALLOC] = sk_rmem_alloc_get(sk); |
|---|
| 3152 | | - mem[SK_MEMINFO_RCVBUF] = sk->sk_rcvbuf; |
|---|
| 3313 | + mem[SK_MEMINFO_RCVBUF] = READ_ONCE(sk->sk_rcvbuf); |
|---|
| 3153 | 3314 | mem[SK_MEMINFO_WMEM_ALLOC] = sk_wmem_alloc_get(sk); |
|---|
| 3154 | | - mem[SK_MEMINFO_SNDBUF] = sk->sk_sndbuf; |
|---|
| 3315 | + mem[SK_MEMINFO_SNDBUF] = READ_ONCE(sk->sk_sndbuf); |
|---|
| 3155 | 3316 | mem[SK_MEMINFO_FWD_ALLOC] = sk->sk_forward_alloc; |
|---|
| 3156 | | - mem[SK_MEMINFO_WMEM_QUEUED] = sk->sk_wmem_queued; |
|---|
| 3317 | + mem[SK_MEMINFO_WMEM_QUEUED] = READ_ONCE(sk->sk_wmem_queued); |
|---|
| 3157 | 3318 | mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc); |
|---|
| 3158 | | - mem[SK_MEMINFO_BACKLOG] = sk->sk_backlog.len; |
|---|
| 3319 | + mem[SK_MEMINFO_BACKLOG] = READ_ONCE(sk->sk_backlog.len); |
|---|
| 3159 | 3320 | mem[SK_MEMINFO_DROPS] = atomic_read(&sk->sk_drops); |
|---|
| 3160 | 3321 | } |
|---|
| 3161 | 3322 | |
|---|
| .. | .. |
|---|
| 3240 | 3401 | |
|---|
| 3241 | 3402 | core_initcall(net_inuse_init); |
|---|
| 3242 | 3403 | |
|---|
| 3243 | | -static void assign_proto_idx(struct proto *prot) |
|---|
| 3404 | +static int assign_proto_idx(struct proto *prot) |
|---|
| 3244 | 3405 | { |
|---|
| 3245 | 3406 | prot->inuse_idx = find_first_zero_bit(proto_inuse_idx, PROTO_INUSE_NR); |
|---|
| 3246 | 3407 | |
|---|
| 3247 | 3408 | if (unlikely(prot->inuse_idx == PROTO_INUSE_NR - 1)) { |
|---|
| 3248 | 3409 | pr_err("PROTO_INUSE_NR exhausted\n"); |
|---|
| 3249 | | - return; |
|---|
| 3410 | + return -ENOSPC; |
|---|
| 3250 | 3411 | } |
|---|
| 3251 | 3412 | |
|---|
| 3252 | 3413 | set_bit(prot->inuse_idx, proto_inuse_idx); |
|---|
| 3414 | + return 0; |
|---|
| 3253 | 3415 | } |
|---|
| 3254 | 3416 | |
|---|
| 3255 | 3417 | static void release_proto_idx(struct proto *prot) |
|---|
| .. | .. |
|---|
| 3258 | 3420 | clear_bit(prot->inuse_idx, proto_inuse_idx); |
|---|
| 3259 | 3421 | } |
|---|
| 3260 | 3422 | #else |
|---|
| 3261 | | -static inline void assign_proto_idx(struct proto *prot) |
|---|
| 3423 | +static inline int assign_proto_idx(struct proto *prot) |
|---|
| 3262 | 3424 | { |
|---|
| 3425 | + return 0; |
|---|
| 3263 | 3426 | } |
|---|
| 3264 | 3427 | |
|---|
| 3265 | 3428 | static inline void release_proto_idx(struct proto *prot) |
|---|
| .. | .. |
|---|
| 3270 | 3433 | { |
|---|
| 3271 | 3434 | } |
|---|
| 3272 | 3435 | #endif |
|---|
| 3436 | + |
|---|
| 3437 | +static void tw_prot_cleanup(struct timewait_sock_ops *twsk_prot) |
|---|
| 3438 | +{ |
|---|
| 3439 | + if (!twsk_prot) |
|---|
| 3440 | + return; |
|---|
| 3441 | + kfree(twsk_prot->twsk_slab_name); |
|---|
| 3442 | + twsk_prot->twsk_slab_name = NULL; |
|---|
| 3443 | + kmem_cache_destroy(twsk_prot->twsk_slab); |
|---|
| 3444 | + twsk_prot->twsk_slab = NULL; |
|---|
| 3445 | +} |
|---|
| 3273 | 3446 | |
|---|
| 3274 | 3447 | static void req_prot_cleanup(struct request_sock_ops *rsk_prot) |
|---|
| 3275 | 3448 | { |
|---|
| .. | .. |
|---|
| 3308 | 3481 | |
|---|
| 3309 | 3482 | int proto_register(struct proto *prot, int alloc_slab) |
|---|
| 3310 | 3483 | { |
|---|
| 3484 | + int ret = -ENOBUFS; |
|---|
| 3485 | + |
|---|
| 3311 | 3486 | if (alloc_slab) { |
|---|
| 3312 | 3487 | prot->slab = kmem_cache_create_usercopy(prot->name, |
|---|
| 3313 | 3488 | prot->obj_size, 0, |
|---|
| .. | .. |
|---|
| 3339 | 3514 | prot->slab_flags, |
|---|
| 3340 | 3515 | NULL); |
|---|
| 3341 | 3516 | if (prot->twsk_prot->twsk_slab == NULL) |
|---|
| 3342 | | - goto out_free_timewait_sock_slab_name; |
|---|
| 3517 | + goto out_free_timewait_sock_slab; |
|---|
| 3343 | 3518 | } |
|---|
| 3344 | 3519 | } |
|---|
| 3345 | 3520 | |
|---|
| 3346 | 3521 | mutex_lock(&proto_list_mutex); |
|---|
| 3522 | + ret = assign_proto_idx(prot); |
|---|
| 3523 | + if (ret) { |
|---|
| 3524 | + mutex_unlock(&proto_list_mutex); |
|---|
| 3525 | + goto out_free_timewait_sock_slab; |
|---|
| 3526 | + } |
|---|
| 3347 | 3527 | list_add(&prot->node, &proto_list); |
|---|
| 3348 | | - assign_proto_idx(prot); |
|---|
| 3349 | 3528 | mutex_unlock(&proto_list_mutex); |
|---|
| 3350 | | - return 0; |
|---|
| 3529 | + return ret; |
|---|
| 3351 | 3530 | |
|---|
| 3352 | | -out_free_timewait_sock_slab_name: |
|---|
| 3353 | | - kfree(prot->twsk_prot->twsk_slab_name); |
|---|
| 3531 | +out_free_timewait_sock_slab: |
|---|
| 3532 | + if (alloc_slab && prot->twsk_prot) |
|---|
| 3533 | + tw_prot_cleanup(prot->twsk_prot); |
|---|
| 3354 | 3534 | out_free_request_sock_slab: |
|---|
| 3355 | | - req_prot_cleanup(prot->rsk_prot); |
|---|
| 3535 | + if (alloc_slab) { |
|---|
| 3536 | + req_prot_cleanup(prot->rsk_prot); |
|---|
| 3356 | 3537 | |
|---|
| 3357 | | - kmem_cache_destroy(prot->slab); |
|---|
| 3358 | | - prot->slab = NULL; |
|---|
| 3538 | + kmem_cache_destroy(prot->slab); |
|---|
| 3539 | + prot->slab = NULL; |
|---|
| 3540 | + } |
|---|
| 3359 | 3541 | out: |
|---|
| 3360 | | - return -ENOBUFS; |
|---|
| 3542 | + return ret; |
|---|
| 3361 | 3543 | } |
|---|
| 3362 | 3544 | EXPORT_SYMBOL(proto_register); |
|---|
| 3363 | 3545 | |
|---|
| .. | .. |
|---|
| 3372 | 3554 | prot->slab = NULL; |
|---|
| 3373 | 3555 | |
|---|
| 3374 | 3556 | req_prot_cleanup(prot->rsk_prot); |
|---|
| 3375 | | - |
|---|
| 3376 | | - if (prot->twsk_prot != NULL && prot->twsk_prot->twsk_slab != NULL) { |
|---|
| 3377 | | - kmem_cache_destroy(prot->twsk_prot->twsk_slab); |
|---|
| 3378 | | - kfree(prot->twsk_prot->twsk_slab_name); |
|---|
| 3379 | | - prot->twsk_prot->twsk_slab = NULL; |
|---|
| 3380 | | - } |
|---|
| 3557 | + tw_prot_cleanup(prot->twsk_prot); |
|---|
| 3381 | 3558 | } |
|---|
| 3382 | 3559 | EXPORT_SYMBOL(proto_unregister); |
|---|
| 3383 | 3560 | |
|---|
| .. | .. |
|---|
| 3394 | 3571 | #ifdef CONFIG_INET |
|---|
| 3395 | 3572 | if (family == AF_INET && |
|---|
| 3396 | 3573 | protocol != IPPROTO_RAW && |
|---|
| 3574 | + protocol < MAX_INET_PROTOS && |
|---|
| 3397 | 3575 | !rcu_access_pointer(inet_protos[protocol])) |
|---|
| 3398 | 3576 | return -ENOENT; |
|---|
| 3399 | 3577 | #endif |
|---|
| .. | .. |
|---|
| 3431 | 3609 | return proto->memory_allocated != NULL ? proto_memory_allocated(proto) : -1L; |
|---|
| 3432 | 3610 | } |
|---|
| 3433 | 3611 | |
|---|
| 3434 | | -static char *sock_prot_memory_pressure(struct proto *proto) |
|---|
| 3612 | +static const char *sock_prot_memory_pressure(struct proto *proto) |
|---|
| 3435 | 3613 | { |
|---|
| 3436 | 3614 | return proto->memory_pressure != NULL ? |
|---|
| 3437 | 3615 | proto_memory_pressure(proto) ? "yes" : "no" : "NI"; |
|---|
| .. | .. |
|---|
| 3535 | 3713 | } |
|---|
| 3536 | 3714 | EXPORT_SYMBOL(sk_busy_loop_end); |
|---|
| 3537 | 3715 | #endif /* CONFIG_NET_RX_BUSY_POLL */ |
|---|
| 3716 | + |
|---|
| 3717 | +int sock_bind_add(struct sock *sk, struct sockaddr *addr, int addr_len) |
|---|
| 3718 | +{ |
|---|
| 3719 | + if (!sk->sk_prot->bind_add) |
|---|
| 3720 | + return -EOPNOTSUPP; |
|---|
| 3721 | + return sk->sk_prot->bind_add(sk, addr, addr_len); |
|---|
| 3722 | +} |
|---|
| 3723 | +EXPORT_SYMBOL(sock_bind_add); |
|---|