.. | .. |
---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-or-later |
---|
1 | 2 | /* |
---|
2 | 3 | * INET An implementation of the TCP/IP protocol suite for the LINUX |
---|
3 | 4 | * operating system. INET is implemented using the BSD Socket |
---|
.. | .. |
---|
5 | 6 | * |
---|
6 | 7 | * Generic socket support routines. Memory allocators, socket lock/release |
---|
7 | 8 | * handler for protocols to use and generic option handler. |
---|
8 | | - * |
---|
9 | 9 | * |
---|
10 | 10 | * Authors: Ross Biro |
---|
11 | 11 | * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> |
---|
.. | .. |
---|
81 | 81 | * Arnaldo C. Melo : cleanups, use skb_queue_purge |
---|
82 | 82 | * |
---|
83 | 83 | * To Fix: |
---|
84 | | - * |
---|
85 | | - * |
---|
86 | | - * This program is free software; you can redistribute it and/or |
---|
87 | | - * modify it under the terms of the GNU General Public License |
---|
88 | | - * as published by the Free Software Foundation; either version |
---|
89 | | - * 2 of the License, or (at your option) any later version. |
---|
90 | 84 | */ |
---|
91 | 85 | |
---|
92 | 86 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
---|
.. | .. |
---|
119 | 113 | #include <linux/static_key.h> |
---|
120 | 114 | #include <linux/memcontrol.h> |
---|
121 | 115 | #include <linux/prefetch.h> |
---|
| 116 | +#include <linux/compat.h> |
---|
122 | 117 | |
---|
123 | 118 | #include <linux/uaccess.h> |
---|
124 | 119 | |
---|
.. | .. |
---|
137 | 132 | |
---|
138 | 133 | #include <linux/filter.h> |
---|
139 | 134 | #include <net/sock_reuseport.h> |
---|
| 135 | +#include <net/bpf_sk_storage.h> |
---|
140 | 136 | |
---|
141 | 137 | #include <trace/events/sock.h> |
---|
| 138 | +#include <trace/hooks/sched.h> |
---|
142 | 139 | |
---|
143 | 140 | #include <net/tcp.h> |
---|
144 | 141 | #include <net/busy_poll.h> |
---|
.. | .. |
---|
335 | 332 | } |
---|
336 | 333 | EXPORT_SYMBOL(__sk_backlog_rcv); |
---|
337 | 334 | |
---|
338 | | -static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen) |
---|
| 335 | +static int sock_get_timeout(long timeo, void *optval, bool old_timeval) |
---|
339 | 336 | { |
---|
340 | | - struct timeval tv; |
---|
| 337 | + struct __kernel_sock_timeval tv; |
---|
341 | 338 | |
---|
342 | | - if (optlen < sizeof(tv)) |
---|
343 | | - return -EINVAL; |
---|
344 | | - if (copy_from_user(&tv, optval, sizeof(tv))) |
---|
345 | | - return -EFAULT; |
---|
| 339 | + if (timeo == MAX_SCHEDULE_TIMEOUT) { |
---|
| 340 | + tv.tv_sec = 0; |
---|
| 341 | + tv.tv_usec = 0; |
---|
| 342 | + } else { |
---|
| 343 | + tv.tv_sec = timeo / HZ; |
---|
| 344 | + tv.tv_usec = ((timeo % HZ) * USEC_PER_SEC) / HZ; |
---|
| 345 | + } |
---|
| 346 | + |
---|
| 347 | + if (old_timeval && in_compat_syscall() && !COMPAT_USE_64BIT_TIME) { |
---|
| 348 | + struct old_timeval32 tv32 = { tv.tv_sec, tv.tv_usec }; |
---|
| 349 | + *(struct old_timeval32 *)optval = tv32; |
---|
| 350 | + return sizeof(tv32); |
---|
| 351 | + } |
---|
| 352 | + |
---|
| 353 | + if (old_timeval) { |
---|
| 354 | + struct __kernel_old_timeval old_tv; |
---|
| 355 | + old_tv.tv_sec = tv.tv_sec; |
---|
| 356 | + old_tv.tv_usec = tv.tv_usec; |
---|
| 357 | + *(struct __kernel_old_timeval *)optval = old_tv; |
---|
| 358 | + return sizeof(old_tv); |
---|
| 359 | + } |
---|
| 360 | + |
---|
| 361 | + *(struct __kernel_sock_timeval *)optval = tv; |
---|
| 362 | + return sizeof(tv); |
---|
| 363 | +} |
---|
| 364 | + |
---|
| 365 | +static int sock_set_timeout(long *timeo_p, sockptr_t optval, int optlen, |
---|
| 366 | + bool old_timeval) |
---|
| 367 | +{ |
---|
| 368 | + struct __kernel_sock_timeval tv; |
---|
| 369 | + |
---|
| 370 | + if (old_timeval && in_compat_syscall() && !COMPAT_USE_64BIT_TIME) { |
---|
| 371 | + struct old_timeval32 tv32; |
---|
| 372 | + |
---|
| 373 | + if (optlen < sizeof(tv32)) |
---|
| 374 | + return -EINVAL; |
---|
| 375 | + |
---|
| 376 | + if (copy_from_sockptr(&tv32, optval, sizeof(tv32))) |
---|
| 377 | + return -EFAULT; |
---|
| 378 | + tv.tv_sec = tv32.tv_sec; |
---|
| 379 | + tv.tv_usec = tv32.tv_usec; |
---|
| 380 | + } else if (old_timeval) { |
---|
| 381 | + struct __kernel_old_timeval old_tv; |
---|
| 382 | + |
---|
| 383 | + if (optlen < sizeof(old_tv)) |
---|
| 384 | + return -EINVAL; |
---|
| 385 | + if (copy_from_sockptr(&old_tv, optval, sizeof(old_tv))) |
---|
| 386 | + return -EFAULT; |
---|
| 387 | + tv.tv_sec = old_tv.tv_sec; |
---|
| 388 | + tv.tv_usec = old_tv.tv_usec; |
---|
| 389 | + } else { |
---|
| 390 | + if (optlen < sizeof(tv)) |
---|
| 391 | + return -EINVAL; |
---|
| 392 | + if (copy_from_sockptr(&tv, optval, sizeof(tv))) |
---|
| 393 | + return -EFAULT; |
---|
| 394 | + } |
---|
346 | 395 | if (tv.tv_usec < 0 || tv.tv_usec >= USEC_PER_SEC) |
---|
347 | 396 | return -EDOM; |
---|
348 | 397 | |
---|
.. | .. |
---|
360 | 409 | *timeo_p = MAX_SCHEDULE_TIMEOUT; |
---|
361 | 410 | if (tv.tv_sec == 0 && tv.tv_usec == 0) |
---|
362 | 411 | return 0; |
---|
363 | | - if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT/HZ - 1)) |
---|
364 | | - *timeo_p = tv.tv_sec * HZ + DIV_ROUND_UP(tv.tv_usec, USEC_PER_SEC / HZ); |
---|
| 412 | + if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT / HZ - 1)) |
---|
| 413 | + *timeo_p = tv.tv_sec * HZ + DIV_ROUND_UP((unsigned long)tv.tv_usec, USEC_PER_SEC / HZ); |
---|
365 | 414 | return 0; |
---|
366 | | -} |
---|
367 | | - |
---|
368 | | -static void sock_warn_obsolete_bsdism(const char *name) |
---|
369 | | -{ |
---|
370 | | - static int warned; |
---|
371 | | - static char warncomm[TASK_COMM_LEN]; |
---|
372 | | - if (strcmp(warncomm, current->comm) && warned < 5) { |
---|
373 | | - strcpy(warncomm, current->comm); |
---|
374 | | - pr_warn("process `%s' is using obsolete %s SO_BSDCOMPAT\n", |
---|
375 | | - warncomm, name); |
---|
376 | | - warned++; |
---|
377 | | - } |
---|
378 | 415 | } |
---|
379 | 416 | |
---|
380 | 417 | static bool sock_needs_netstamp(const struct sock *sk) |
---|
.. | .. |
---|
472 | 509 | |
---|
473 | 510 | rc = sk_backlog_rcv(sk, skb); |
---|
474 | 511 | |
---|
475 | | - mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_); |
---|
476 | | - } else if (sk_add_backlog(sk, skb, sk->sk_rcvbuf)) { |
---|
| 512 | + mutex_release(&sk->sk_lock.dep_map, _RET_IP_); |
---|
| 513 | + } else if (sk_add_backlog(sk, skb, READ_ONCE(sk->sk_rcvbuf))) { |
---|
477 | 514 | bh_unlock_sock(sk); |
---|
478 | 515 | atomic_inc(&sk->sk_drops); |
---|
479 | 516 | goto discard_and_relse; |
---|
.. | .. |
---|
520 | 557 | } |
---|
521 | 558 | EXPORT_SYMBOL(sk_dst_check); |
---|
522 | 559 | |
---|
523 | | -static int sock_setbindtodevice(struct sock *sk, char __user *optval, |
---|
524 | | - int optlen) |
---|
| 560 | +static int sock_bindtoindex_locked(struct sock *sk, int ifindex) |
---|
| 561 | +{ |
---|
| 562 | + int ret = -ENOPROTOOPT; |
---|
| 563 | +#ifdef CONFIG_NETDEVICES |
---|
| 564 | + struct net *net = sock_net(sk); |
---|
| 565 | + |
---|
| 566 | + /* Sorry... */ |
---|
| 567 | + ret = -EPERM; |
---|
| 568 | + if (sk->sk_bound_dev_if && !ns_capable(net->user_ns, CAP_NET_RAW)) |
---|
| 569 | + goto out; |
---|
| 570 | + |
---|
| 571 | + ret = -EINVAL; |
---|
| 572 | + if (ifindex < 0) |
---|
| 573 | + goto out; |
---|
| 574 | + |
---|
| 575 | + sk->sk_bound_dev_if = ifindex; |
---|
| 576 | + if (sk->sk_prot->rehash) |
---|
| 577 | + sk->sk_prot->rehash(sk); |
---|
| 578 | + sk_dst_reset(sk); |
---|
| 579 | + |
---|
| 580 | + ret = 0; |
---|
| 581 | + |
---|
| 582 | +out: |
---|
| 583 | +#endif |
---|
| 584 | + |
---|
| 585 | + return ret; |
---|
| 586 | +} |
---|
| 587 | + |
---|
| 588 | +int sock_bindtoindex(struct sock *sk, int ifindex, bool lock_sk) |
---|
| 589 | +{ |
---|
| 590 | + int ret; |
---|
| 591 | + |
---|
| 592 | + if (lock_sk) |
---|
| 593 | + lock_sock(sk); |
---|
| 594 | + ret = sock_bindtoindex_locked(sk, ifindex); |
---|
| 595 | + if (lock_sk) |
---|
| 596 | + release_sock(sk); |
---|
| 597 | + |
---|
| 598 | + return ret; |
---|
| 599 | +} |
---|
| 600 | +EXPORT_SYMBOL(sock_bindtoindex); |
---|
| 601 | + |
---|
| 602 | +static int sock_setbindtodevice(struct sock *sk, sockptr_t optval, int optlen) |
---|
525 | 603 | { |
---|
526 | 604 | int ret = -ENOPROTOOPT; |
---|
527 | 605 | #ifdef CONFIG_NETDEVICES |
---|
528 | 606 | struct net *net = sock_net(sk); |
---|
529 | 607 | char devname[IFNAMSIZ]; |
---|
530 | 608 | int index; |
---|
531 | | - |
---|
532 | | - /* Sorry... */ |
---|
533 | | - ret = -EPERM; |
---|
534 | | - if (!ns_capable(net->user_ns, CAP_NET_RAW)) |
---|
535 | | - goto out; |
---|
536 | 609 | |
---|
537 | 610 | ret = -EINVAL; |
---|
538 | 611 | if (optlen < 0) |
---|
.. | .. |
---|
548 | 621 | memset(devname, 0, sizeof(devname)); |
---|
549 | 622 | |
---|
550 | 623 | ret = -EFAULT; |
---|
551 | | - if (copy_from_user(devname, optval, optlen)) |
---|
| 624 | + if (copy_from_sockptr(devname, optval, optlen)) |
---|
552 | 625 | goto out; |
---|
553 | 626 | |
---|
554 | 627 | index = 0; |
---|
.. | .. |
---|
565 | 638 | goto out; |
---|
566 | 639 | } |
---|
567 | 640 | |
---|
568 | | - lock_sock(sk); |
---|
569 | | - sk->sk_bound_dev_if = index; |
---|
570 | | - sk_dst_reset(sk); |
---|
571 | | - release_sock(sk); |
---|
572 | | - |
---|
573 | | - ret = 0; |
---|
574 | | - |
---|
| 641 | + return sock_bindtoindex(sk, index, true); |
---|
575 | 642 | out: |
---|
576 | 643 | #endif |
---|
577 | 644 | |
---|
.. | .. |
---|
618 | 685 | return ret; |
---|
619 | 686 | } |
---|
620 | 687 | |
---|
621 | | -static inline void sock_valbool_flag(struct sock *sk, int bit, int valbool) |
---|
622 | | -{ |
---|
623 | | - if (valbool) |
---|
624 | | - sock_set_flag(sk, bit); |
---|
625 | | - else |
---|
626 | | - sock_reset_flag(sk, bit); |
---|
627 | | -} |
---|
628 | | - |
---|
629 | 688 | bool sk_mc_loop(struct sock *sk) |
---|
630 | 689 | { |
---|
631 | 690 | if (dev_recursion_level()) |
---|
.. | .. |
---|
645 | 704 | } |
---|
646 | 705 | EXPORT_SYMBOL(sk_mc_loop); |
---|
647 | 706 | |
---|
| 707 | +void sock_set_reuseaddr(struct sock *sk) |
---|
| 708 | +{ |
---|
| 709 | + lock_sock(sk); |
---|
| 710 | + sk->sk_reuse = SK_CAN_REUSE; |
---|
| 711 | + release_sock(sk); |
---|
| 712 | +} |
---|
| 713 | +EXPORT_SYMBOL(sock_set_reuseaddr); |
---|
| 714 | + |
---|
| 715 | +void sock_set_reuseport(struct sock *sk) |
---|
| 716 | +{ |
---|
| 717 | + lock_sock(sk); |
---|
| 718 | + sk->sk_reuseport = true; |
---|
| 719 | + release_sock(sk); |
---|
| 720 | +} |
---|
| 721 | +EXPORT_SYMBOL(sock_set_reuseport); |
---|
| 722 | + |
---|
| 723 | +void sock_no_linger(struct sock *sk) |
---|
| 724 | +{ |
---|
| 725 | + lock_sock(sk); |
---|
| 726 | + sk->sk_lingertime = 0; |
---|
| 727 | + sock_set_flag(sk, SOCK_LINGER); |
---|
| 728 | + release_sock(sk); |
---|
| 729 | +} |
---|
| 730 | +EXPORT_SYMBOL(sock_no_linger); |
---|
| 731 | + |
---|
| 732 | +void sock_set_priority(struct sock *sk, u32 priority) |
---|
| 733 | +{ |
---|
| 734 | + lock_sock(sk); |
---|
| 735 | + sk->sk_priority = priority; |
---|
| 736 | + release_sock(sk); |
---|
| 737 | +} |
---|
| 738 | +EXPORT_SYMBOL(sock_set_priority); |
---|
| 739 | + |
---|
| 740 | +void sock_set_sndtimeo(struct sock *sk, s64 secs) |
---|
| 741 | +{ |
---|
| 742 | + lock_sock(sk); |
---|
| 743 | + if (secs && secs < MAX_SCHEDULE_TIMEOUT / HZ - 1) |
---|
| 744 | + sk->sk_sndtimeo = secs * HZ; |
---|
| 745 | + else |
---|
| 746 | + sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT; |
---|
| 747 | + release_sock(sk); |
---|
| 748 | +} |
---|
| 749 | +EXPORT_SYMBOL(sock_set_sndtimeo); |
---|
| 750 | + |
---|
| 751 | +static void __sock_set_timestamps(struct sock *sk, bool val, bool new, bool ns) |
---|
| 752 | +{ |
---|
| 753 | + if (val) { |
---|
| 754 | + sock_valbool_flag(sk, SOCK_TSTAMP_NEW, new); |
---|
| 755 | + sock_valbool_flag(sk, SOCK_RCVTSTAMPNS, ns); |
---|
| 756 | + sock_set_flag(sk, SOCK_RCVTSTAMP); |
---|
| 757 | + sock_enable_timestamp(sk, SOCK_TIMESTAMP); |
---|
| 758 | + } else { |
---|
| 759 | + sock_reset_flag(sk, SOCK_RCVTSTAMP); |
---|
| 760 | + sock_reset_flag(sk, SOCK_RCVTSTAMPNS); |
---|
| 761 | + } |
---|
| 762 | +} |
---|
| 763 | + |
---|
| 764 | +void sock_enable_timestamps(struct sock *sk) |
---|
| 765 | +{ |
---|
| 766 | + lock_sock(sk); |
---|
| 767 | + __sock_set_timestamps(sk, true, false, true); |
---|
| 768 | + release_sock(sk); |
---|
| 769 | +} |
---|
| 770 | +EXPORT_SYMBOL(sock_enable_timestamps); |
---|
| 771 | + |
---|
| 772 | +void sock_set_keepalive(struct sock *sk) |
---|
| 773 | +{ |
---|
| 774 | + lock_sock(sk); |
---|
| 775 | + if (sk->sk_prot->keepalive) |
---|
| 776 | + sk->sk_prot->keepalive(sk, true); |
---|
| 777 | + sock_valbool_flag(sk, SOCK_KEEPOPEN, true); |
---|
| 778 | + release_sock(sk); |
---|
| 779 | +} |
---|
| 780 | +EXPORT_SYMBOL(sock_set_keepalive); |
---|
| 781 | + |
---|
| 782 | +static void __sock_set_rcvbuf(struct sock *sk, int val) |
---|
| 783 | +{ |
---|
| 784 | + /* Ensure val * 2 fits into an int, to prevent max_t() from treating it |
---|
| 785 | + * as a negative value. |
---|
| 786 | + */ |
---|
| 787 | + val = min_t(int, val, INT_MAX / 2); |
---|
| 788 | + sk->sk_userlocks |= SOCK_RCVBUF_LOCK; |
---|
| 789 | + |
---|
| 790 | + /* We double it on the way in to account for "struct sk_buff" etc. |
---|
| 791 | + * overhead. Applications assume that the SO_RCVBUF setting they make |
---|
| 792 | + * will allow that much actual data to be received on that socket. |
---|
| 793 | + * |
---|
| 794 | + * Applications are unaware that "struct sk_buff" and other overheads |
---|
| 795 | + * allocate from the receive buffer during socket buffer allocation. |
---|
| 796 | + * |
---|
| 797 | + * And after considering the possible alternatives, returning the value |
---|
| 798 | + * we actually used in getsockopt is the most desirable behavior. |
---|
| 799 | + */ |
---|
| 800 | + WRITE_ONCE(sk->sk_rcvbuf, max_t(int, val * 2, SOCK_MIN_RCVBUF)); |
---|
| 801 | +} |
---|
| 802 | + |
---|
| 803 | +void sock_set_rcvbuf(struct sock *sk, int val) |
---|
| 804 | +{ |
---|
| 805 | + lock_sock(sk); |
---|
| 806 | + __sock_set_rcvbuf(sk, val); |
---|
| 807 | + release_sock(sk); |
---|
| 808 | +} |
---|
| 809 | +EXPORT_SYMBOL(sock_set_rcvbuf); |
---|
| 810 | + |
---|
| 811 | +static void __sock_set_mark(struct sock *sk, u32 val) |
---|
| 812 | +{ |
---|
| 813 | + if (val != sk->sk_mark) { |
---|
| 814 | + sk->sk_mark = val; |
---|
| 815 | + sk_dst_reset(sk); |
---|
| 816 | + } |
---|
| 817 | +} |
---|
| 818 | + |
---|
| 819 | +void sock_set_mark(struct sock *sk, u32 val) |
---|
| 820 | +{ |
---|
| 821 | + lock_sock(sk); |
---|
| 822 | + __sock_set_mark(sk, val); |
---|
| 823 | + release_sock(sk); |
---|
| 824 | +} |
---|
| 825 | +EXPORT_SYMBOL(sock_set_mark); |
---|
| 826 | + |
---|
648 | 827 | /* |
---|
649 | 828 | * This is meant for all protocols to use and covers goings on |
---|
650 | 829 | * at the socket level. Everything here is generic. |
---|
651 | 830 | */ |
---|
652 | 831 | |
---|
653 | 832 | int sock_setsockopt(struct socket *sock, int level, int optname, |
---|
654 | | - char __user *optval, unsigned int optlen) |
---|
| 833 | + sockptr_t optval, unsigned int optlen) |
---|
655 | 834 | { |
---|
656 | 835 | struct sock_txtime sk_txtime; |
---|
657 | 836 | struct sock *sk = sock->sk; |
---|
.. | .. |
---|
670 | 849 | if (optlen < sizeof(int)) |
---|
671 | 850 | return -EINVAL; |
---|
672 | 851 | |
---|
673 | | - if (get_user(val, (int __user *)optval)) |
---|
| 852 | + if (copy_from_sockptr(&val, optval, sizeof(val))) |
---|
674 | 853 | return -EFAULT; |
---|
675 | 854 | |
---|
676 | 855 | valbool = val ? 1 : 0; |
---|
.. | .. |
---|
709 | 888 | * play 'guess the biggest size' games. RCVBUF/SNDBUF |
---|
710 | 889 | * are treated in BSD as hints |
---|
711 | 890 | */ |
---|
712 | | - val = min_t(u32, val, sysctl_wmem_max); |
---|
| 891 | + val = min_t(u32, val, READ_ONCE(sysctl_wmem_max)); |
---|
713 | 892 | set_sndbuf: |
---|
| 893 | + /* Ensure val * 2 fits into an int, to prevent max_t() |
---|
| 894 | + * from treating it as a negative value. |
---|
| 895 | + */ |
---|
| 896 | + val = min_t(int, val, INT_MAX / 2); |
---|
714 | 897 | sk->sk_userlocks |= SOCK_SNDBUF_LOCK; |
---|
715 | | - sk->sk_sndbuf = max_t(int, val * 2, SOCK_MIN_SNDBUF); |
---|
| 898 | + WRITE_ONCE(sk->sk_sndbuf, |
---|
| 899 | + max_t(int, val * 2, SOCK_MIN_SNDBUF)); |
---|
716 | 900 | /* Wake up sending tasks if we upped the value. */ |
---|
717 | 901 | sk->sk_write_space(sk); |
---|
718 | 902 | break; |
---|
.. | .. |
---|
722 | 906 | ret = -EPERM; |
---|
723 | 907 | break; |
---|
724 | 908 | } |
---|
| 909 | + |
---|
| 910 | + /* No negative values (to prevent underflow, as val will be |
---|
| 911 | + * multiplied by 2). |
---|
| 912 | + */ |
---|
| 913 | + if (val < 0) |
---|
| 914 | + val = 0; |
---|
725 | 915 | goto set_sndbuf; |
---|
726 | 916 | |
---|
727 | 917 | case SO_RCVBUF: |
---|
.. | .. |
---|
730 | 920 | * play 'guess the biggest size' games. RCVBUF/SNDBUF |
---|
731 | 921 | * are treated in BSD as hints |
---|
732 | 922 | */ |
---|
733 | | - val = min_t(u32, val, sysctl_rmem_max); |
---|
734 | | -set_rcvbuf: |
---|
735 | | - sk->sk_userlocks |= SOCK_RCVBUF_LOCK; |
---|
736 | | - /* |
---|
737 | | - * We double it on the way in to account for |
---|
738 | | - * "struct sk_buff" etc. overhead. Applications |
---|
739 | | - * assume that the SO_RCVBUF setting they make will |
---|
740 | | - * allow that much actual data to be received on that |
---|
741 | | - * socket. |
---|
742 | | - * |
---|
743 | | - * Applications are unaware that "struct sk_buff" and |
---|
744 | | - * other overheads allocate from the receive buffer |
---|
745 | | - * during socket buffer allocation. |
---|
746 | | - * |
---|
747 | | - * And after considering the possible alternatives, |
---|
748 | | - * returning the value we actually used in getsockopt |
---|
749 | | - * is the most desirable behavior. |
---|
750 | | - */ |
---|
751 | | - sk->sk_rcvbuf = max_t(int, val * 2, SOCK_MIN_RCVBUF); |
---|
| 923 | + __sock_set_rcvbuf(sk, min_t(u32, val, READ_ONCE(sysctl_rmem_max))); |
---|
752 | 924 | break; |
---|
753 | 925 | |
---|
754 | 926 | case SO_RCVBUFFORCE: |
---|
.. | .. |
---|
756 | 928 | ret = -EPERM; |
---|
757 | 929 | break; |
---|
758 | 930 | } |
---|
759 | | - goto set_rcvbuf; |
---|
| 931 | + |
---|
| 932 | + /* No negative values (to prevent underflow, as val will be |
---|
| 933 | + * multiplied by 2). |
---|
| 934 | + */ |
---|
| 935 | + __sock_set_rcvbuf(sk, max(val, 0)); |
---|
| 936 | + break; |
---|
760 | 937 | |
---|
761 | 938 | case SO_KEEPALIVE: |
---|
762 | 939 | if (sk->sk_prot->keepalive) |
---|
.. | .. |
---|
785 | 962 | ret = -EINVAL; /* 1003.1g */ |
---|
786 | 963 | break; |
---|
787 | 964 | } |
---|
788 | | - if (copy_from_user(&ling, optval, sizeof(ling))) { |
---|
| 965 | + if (copy_from_sockptr(&ling, optval, sizeof(ling))) { |
---|
789 | 966 | ret = -EFAULT; |
---|
790 | 967 | break; |
---|
791 | 968 | } |
---|
.. | .. |
---|
803 | 980 | break; |
---|
804 | 981 | |
---|
805 | 982 | case SO_BSDCOMPAT: |
---|
806 | | - sock_warn_obsolete_bsdism("setsockopt"); |
---|
807 | 983 | break; |
---|
808 | 984 | |
---|
809 | 985 | case SO_PASSCRED: |
---|
.. | .. |
---|
813 | 989 | clear_bit(SOCK_PASSCRED, &sock->flags); |
---|
814 | 990 | break; |
---|
815 | 991 | |
---|
816 | | - case SO_TIMESTAMP: |
---|
817 | | - case SO_TIMESTAMPNS: |
---|
818 | | - if (valbool) { |
---|
819 | | - if (optname == SO_TIMESTAMP) |
---|
820 | | - sock_reset_flag(sk, SOCK_RCVTSTAMPNS); |
---|
821 | | - else |
---|
822 | | - sock_set_flag(sk, SOCK_RCVTSTAMPNS); |
---|
823 | | - sock_set_flag(sk, SOCK_RCVTSTAMP); |
---|
824 | | - sock_enable_timestamp(sk, SOCK_TIMESTAMP); |
---|
825 | | - } else { |
---|
826 | | - sock_reset_flag(sk, SOCK_RCVTSTAMP); |
---|
827 | | - sock_reset_flag(sk, SOCK_RCVTSTAMPNS); |
---|
828 | | - } |
---|
| 992 | + case SO_TIMESTAMP_OLD: |
---|
| 993 | + __sock_set_timestamps(sk, valbool, false, false); |
---|
829 | 994 | break; |
---|
830 | | - |
---|
831 | | - case SO_TIMESTAMPING: |
---|
| 995 | + case SO_TIMESTAMP_NEW: |
---|
| 996 | + __sock_set_timestamps(sk, valbool, true, false); |
---|
| 997 | + break; |
---|
| 998 | + case SO_TIMESTAMPNS_OLD: |
---|
| 999 | + __sock_set_timestamps(sk, valbool, false, true); |
---|
| 1000 | + break; |
---|
| 1001 | + case SO_TIMESTAMPNS_NEW: |
---|
| 1002 | + __sock_set_timestamps(sk, valbool, true, true); |
---|
| 1003 | + break; |
---|
| 1004 | + case SO_TIMESTAMPING_NEW: |
---|
| 1005 | + case SO_TIMESTAMPING_OLD: |
---|
832 | 1006 | if (val & ~SOF_TIMESTAMPING_MASK) { |
---|
833 | 1007 | ret = -EINVAL; |
---|
834 | 1008 | break; |
---|
.. | .. |
---|
856 | 1030 | } |
---|
857 | 1031 | |
---|
858 | 1032 | sk->sk_tsflags = val; |
---|
| 1033 | + sock_valbool_flag(sk, SOCK_TSTAMP_NEW, optname == SO_TIMESTAMPING_NEW); |
---|
| 1034 | + |
---|
859 | 1035 | if (val & SOF_TIMESTAMPING_RX_SOFTWARE) |
---|
860 | 1036 | sock_enable_timestamp(sk, |
---|
861 | 1037 | SOCK_TIMESTAMPING_RX_SOFTWARE); |
---|
.. | .. |
---|
870 | 1046 | if (sock->ops->set_rcvlowat) |
---|
871 | 1047 | ret = sock->ops->set_rcvlowat(sk, val); |
---|
872 | 1048 | else |
---|
873 | | - sk->sk_rcvlowat = val ? : 1; |
---|
| 1049 | + WRITE_ONCE(sk->sk_rcvlowat, val ? : 1); |
---|
874 | 1050 | break; |
---|
875 | 1051 | |
---|
876 | | - case SO_RCVTIMEO: |
---|
877 | | - ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen); |
---|
| 1052 | + case SO_RCVTIMEO_OLD: |
---|
| 1053 | + case SO_RCVTIMEO_NEW: |
---|
| 1054 | + ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, |
---|
| 1055 | + optlen, optname == SO_RCVTIMEO_OLD); |
---|
878 | 1056 | break; |
---|
879 | 1057 | |
---|
880 | | - case SO_SNDTIMEO: |
---|
881 | | - ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen); |
---|
| 1058 | + case SO_SNDTIMEO_OLD: |
---|
| 1059 | + case SO_SNDTIMEO_NEW: |
---|
| 1060 | + ret = sock_set_timeout(&sk->sk_sndtimeo, optval, |
---|
| 1061 | + optlen, optname == SO_SNDTIMEO_OLD); |
---|
882 | 1062 | break; |
---|
883 | 1063 | |
---|
884 | | - case SO_ATTACH_FILTER: |
---|
885 | | - ret = -EINVAL; |
---|
886 | | - if (optlen == sizeof(struct sock_fprog)) { |
---|
887 | | - struct sock_fprog fprog; |
---|
| 1064 | + case SO_ATTACH_FILTER: { |
---|
| 1065 | + struct sock_fprog fprog; |
---|
888 | 1066 | |
---|
889 | | - ret = -EFAULT; |
---|
890 | | - if (copy_from_user(&fprog, optval, sizeof(fprog))) |
---|
891 | | - break; |
---|
892 | | - |
---|
| 1067 | + ret = copy_bpf_fprog_from_user(&fprog, optval, optlen); |
---|
| 1068 | + if (!ret) |
---|
893 | 1069 | ret = sk_attach_filter(&fprog, sk); |
---|
894 | | - } |
---|
895 | 1070 | break; |
---|
896 | | - |
---|
| 1071 | + } |
---|
897 | 1072 | case SO_ATTACH_BPF: |
---|
898 | 1073 | ret = -EINVAL; |
---|
899 | 1074 | if (optlen == sizeof(u32)) { |
---|
900 | 1075 | u32 ufd; |
---|
901 | 1076 | |
---|
902 | 1077 | ret = -EFAULT; |
---|
903 | | - if (copy_from_user(&ufd, optval, sizeof(ufd))) |
---|
| 1078 | + if (copy_from_sockptr(&ufd, optval, sizeof(ufd))) |
---|
904 | 1079 | break; |
---|
905 | 1080 | |
---|
906 | 1081 | ret = sk_attach_bpf(ufd, sk); |
---|
907 | 1082 | } |
---|
908 | 1083 | break; |
---|
909 | 1084 | |
---|
910 | | - case SO_ATTACH_REUSEPORT_CBPF: |
---|
911 | | - ret = -EINVAL; |
---|
912 | | - if (optlen == sizeof(struct sock_fprog)) { |
---|
913 | | - struct sock_fprog fprog; |
---|
| 1085 | + case SO_ATTACH_REUSEPORT_CBPF: { |
---|
| 1086 | + struct sock_fprog fprog; |
---|
914 | 1087 | |
---|
915 | | - ret = -EFAULT; |
---|
916 | | - if (copy_from_user(&fprog, optval, sizeof(fprog))) |
---|
917 | | - break; |
---|
918 | | - |
---|
| 1088 | + ret = copy_bpf_fprog_from_user(&fprog, optval, optlen); |
---|
| 1089 | + if (!ret) |
---|
919 | 1090 | ret = sk_reuseport_attach_filter(&fprog, sk); |
---|
920 | | - } |
---|
921 | 1091 | break; |
---|
922 | | - |
---|
| 1092 | + } |
---|
923 | 1093 | case SO_ATTACH_REUSEPORT_EBPF: |
---|
924 | 1094 | ret = -EINVAL; |
---|
925 | 1095 | if (optlen == sizeof(u32)) { |
---|
926 | 1096 | u32 ufd; |
---|
927 | 1097 | |
---|
928 | 1098 | ret = -EFAULT; |
---|
929 | | - if (copy_from_user(&ufd, optval, sizeof(ufd))) |
---|
| 1099 | + if (copy_from_sockptr(&ufd, optval, sizeof(ufd))) |
---|
930 | 1100 | break; |
---|
931 | 1101 | |
---|
932 | 1102 | ret = sk_reuseport_attach_bpf(ufd, sk); |
---|
933 | 1103 | } |
---|
| 1104 | + break; |
---|
| 1105 | + |
---|
| 1106 | + case SO_DETACH_REUSEPORT_BPF: |
---|
| 1107 | + ret = reuseport_detach_prog(sk); |
---|
934 | 1108 | break; |
---|
935 | 1109 | |
---|
936 | 1110 | case SO_DETACH_FILTER: |
---|
.. | .. |
---|
951 | 1125 | clear_bit(SOCK_PASSSEC, &sock->flags); |
---|
952 | 1126 | break; |
---|
953 | 1127 | case SO_MARK: |
---|
954 | | - if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) |
---|
| 1128 | + if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) { |
---|
955 | 1129 | ret = -EPERM; |
---|
956 | | - else |
---|
957 | | - sk->sk_mark = val; |
---|
| 1130 | + break; |
---|
| 1131 | + } |
---|
| 1132 | + |
---|
| 1133 | + __sock_set_mark(sk, val); |
---|
958 | 1134 | break; |
---|
959 | 1135 | |
---|
960 | 1136 | case SO_RXQ_OVFL: |
---|
.. | .. |
---|
995 | 1171 | #endif |
---|
996 | 1172 | |
---|
997 | 1173 | case SO_MAX_PACING_RATE: |
---|
998 | | - if (val != ~0U) |
---|
| 1174 | + { |
---|
| 1175 | + unsigned long ulval = (val == ~0U) ? ~0UL : (unsigned int)val; |
---|
| 1176 | + |
---|
| 1177 | + if (sizeof(ulval) != sizeof(val) && |
---|
| 1178 | + optlen >= sizeof(ulval) && |
---|
| 1179 | + copy_from_sockptr(&ulval, optval, sizeof(ulval))) { |
---|
| 1180 | + ret = -EFAULT; |
---|
| 1181 | + break; |
---|
| 1182 | + } |
---|
| 1183 | + if (ulval != ~0UL) |
---|
999 | 1184 | cmpxchg(&sk->sk_pacing_status, |
---|
1000 | 1185 | SK_PACING_NONE, |
---|
1001 | 1186 | SK_PACING_NEEDED); |
---|
1002 | | - sk->sk_max_pacing_rate = val; |
---|
1003 | | - sk->sk_pacing_rate = min(sk->sk_pacing_rate, |
---|
1004 | | - sk->sk_max_pacing_rate); |
---|
| 1187 | + sk->sk_max_pacing_rate = ulval; |
---|
| 1188 | + sk->sk_pacing_rate = min(sk->sk_pacing_rate, ulval); |
---|
1005 | 1189 | break; |
---|
1006 | | - |
---|
| 1190 | + } |
---|
1007 | 1191 | case SO_INCOMING_CPU: |
---|
1008 | 1192 | WRITE_ONCE(sk->sk_incoming_cpu, val); |
---|
1009 | 1193 | break; |
---|
.. | .. |
---|
1015 | 1199 | |
---|
1016 | 1200 | case SO_ZEROCOPY: |
---|
1017 | 1201 | if (sk->sk_family == PF_INET || sk->sk_family == PF_INET6) { |
---|
1018 | | - if (sk->sk_protocol != IPPROTO_TCP) |
---|
| 1202 | + if (!((sk->sk_type == SOCK_STREAM && |
---|
| 1203 | + sk->sk_protocol == IPPROTO_TCP) || |
---|
| 1204 | + (sk->sk_type == SOCK_DGRAM && |
---|
| 1205 | + sk->sk_protocol == IPPROTO_UDP))) |
---|
1019 | 1206 | ret = -ENOTSUPP; |
---|
1020 | 1207 | } else if (sk->sk_family != PF_RDS) { |
---|
1021 | 1208 | ret = -ENOTSUPP; |
---|
.. | .. |
---|
1029 | 1216 | break; |
---|
1030 | 1217 | |
---|
1031 | 1218 | case SO_TXTIME: |
---|
1032 | | - if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) { |
---|
1033 | | - ret = -EPERM; |
---|
1034 | | - } else if (optlen != sizeof(struct sock_txtime)) { |
---|
| 1219 | + if (optlen != sizeof(struct sock_txtime)) { |
---|
1035 | 1220 | ret = -EINVAL; |
---|
1036 | | - } else if (copy_from_user(&sk_txtime, optval, |
---|
| 1221 | + break; |
---|
| 1222 | + } else if (copy_from_sockptr(&sk_txtime, optval, |
---|
1037 | 1223 | sizeof(struct sock_txtime))) { |
---|
1038 | 1224 | ret = -EFAULT; |
---|
| 1225 | + break; |
---|
1039 | 1226 | } else if (sk_txtime.flags & ~SOF_TXTIME_FLAGS_MASK) { |
---|
1040 | 1227 | ret = -EINVAL; |
---|
1041 | | - } else { |
---|
1042 | | - sock_valbool_flag(sk, SOCK_TXTIME, true); |
---|
1043 | | - sk->sk_clockid = sk_txtime.clockid; |
---|
1044 | | - sk->sk_txtime_deadline_mode = |
---|
1045 | | - !!(sk_txtime.flags & SOF_TXTIME_DEADLINE_MODE); |
---|
1046 | | - sk->sk_txtime_report_errors = |
---|
1047 | | - !!(sk_txtime.flags & SOF_TXTIME_REPORT_ERRORS); |
---|
| 1228 | + break; |
---|
1048 | 1229 | } |
---|
| 1230 | + /* CLOCK_MONOTONIC is only used by sch_fq, and this packet |
---|
| 1231 | + * scheduler has enough safe guards. |
---|
| 1232 | + */ |
---|
| 1233 | + if (sk_txtime.clockid != CLOCK_MONOTONIC && |
---|
| 1234 | + !ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) { |
---|
| 1235 | + ret = -EPERM; |
---|
| 1236 | + break; |
---|
| 1237 | + } |
---|
| 1238 | + sock_valbool_flag(sk, SOCK_TXTIME, true); |
---|
| 1239 | + sk->sk_clockid = sk_txtime.clockid; |
---|
| 1240 | + sk->sk_txtime_deadline_mode = |
---|
| 1241 | + !!(sk_txtime.flags & SOF_TXTIME_DEADLINE_MODE); |
---|
| 1242 | + sk->sk_txtime_report_errors = |
---|
| 1243 | + !!(sk_txtime.flags & SOF_TXTIME_REPORT_ERRORS); |
---|
| 1244 | + break; |
---|
| 1245 | + |
---|
| 1246 | + case SO_BINDTOIFINDEX: |
---|
| 1247 | + ret = sock_bindtoindex_locked(sk, val); |
---|
1049 | 1248 | break; |
---|
1050 | 1249 | |
---|
1051 | 1250 | default: |
---|
.. | .. |
---|
1101 | 1300 | union { |
---|
1102 | 1301 | int val; |
---|
1103 | 1302 | u64 val64; |
---|
| 1303 | + unsigned long ulval; |
---|
1104 | 1304 | struct linger ling; |
---|
1105 | | - struct timeval tm; |
---|
| 1305 | + struct old_timeval32 tm32; |
---|
| 1306 | + struct __kernel_old_timeval tm; |
---|
| 1307 | + struct __kernel_sock_timeval stm; |
---|
1106 | 1308 | struct sock_txtime txtime; |
---|
1107 | 1309 | } v; |
---|
1108 | 1310 | |
---|
.. | .. |
---|
1186 | 1388 | break; |
---|
1187 | 1389 | |
---|
1188 | 1390 | case SO_BSDCOMPAT: |
---|
1189 | | - sock_warn_obsolete_bsdism("getsockopt"); |
---|
1190 | 1391 | break; |
---|
1191 | 1392 | |
---|
1192 | | - case SO_TIMESTAMP: |
---|
| 1393 | + case SO_TIMESTAMP_OLD: |
---|
1193 | 1394 | v.val = sock_flag(sk, SOCK_RCVTSTAMP) && |
---|
| 1395 | + !sock_flag(sk, SOCK_TSTAMP_NEW) && |
---|
1194 | 1396 | !sock_flag(sk, SOCK_RCVTSTAMPNS); |
---|
1195 | 1397 | break; |
---|
1196 | 1398 | |
---|
1197 | | - case SO_TIMESTAMPNS: |
---|
1198 | | - v.val = sock_flag(sk, SOCK_RCVTSTAMPNS); |
---|
| 1399 | + case SO_TIMESTAMPNS_OLD: |
---|
| 1400 | + v.val = sock_flag(sk, SOCK_RCVTSTAMPNS) && !sock_flag(sk, SOCK_TSTAMP_NEW); |
---|
1199 | 1401 | break; |
---|
1200 | 1402 | |
---|
1201 | | - case SO_TIMESTAMPING: |
---|
| 1403 | + case SO_TIMESTAMP_NEW: |
---|
| 1404 | + v.val = sock_flag(sk, SOCK_RCVTSTAMP) && sock_flag(sk, SOCK_TSTAMP_NEW); |
---|
| 1405 | + break; |
---|
| 1406 | + |
---|
| 1407 | + case SO_TIMESTAMPNS_NEW: |
---|
| 1408 | + v.val = sock_flag(sk, SOCK_RCVTSTAMPNS) && sock_flag(sk, SOCK_TSTAMP_NEW); |
---|
| 1409 | + break; |
---|
| 1410 | + |
---|
| 1411 | + case SO_TIMESTAMPING_OLD: |
---|
1202 | 1412 | v.val = sk->sk_tsflags; |
---|
1203 | 1413 | break; |
---|
1204 | 1414 | |
---|
1205 | | - case SO_RCVTIMEO: |
---|
1206 | | - lv = sizeof(struct timeval); |
---|
1207 | | - if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) { |
---|
1208 | | - v.tm.tv_sec = 0; |
---|
1209 | | - v.tm.tv_usec = 0; |
---|
1210 | | - } else { |
---|
1211 | | - v.tm.tv_sec = sk->sk_rcvtimeo / HZ; |
---|
1212 | | - v.tm.tv_usec = ((sk->sk_rcvtimeo % HZ) * USEC_PER_SEC) / HZ; |
---|
1213 | | - } |
---|
| 1415 | + case SO_RCVTIMEO_OLD: |
---|
| 1416 | + case SO_RCVTIMEO_NEW: |
---|
| 1417 | + lv = sock_get_timeout(sk->sk_rcvtimeo, &v, SO_RCVTIMEO_OLD == optname); |
---|
1214 | 1418 | break; |
---|
1215 | 1419 | |
---|
1216 | | - case SO_SNDTIMEO: |
---|
1217 | | - lv = sizeof(struct timeval); |
---|
1218 | | - if (sk->sk_sndtimeo == MAX_SCHEDULE_TIMEOUT) { |
---|
1219 | | - v.tm.tv_sec = 0; |
---|
1220 | | - v.tm.tv_usec = 0; |
---|
1221 | | - } else { |
---|
1222 | | - v.tm.tv_sec = sk->sk_sndtimeo / HZ; |
---|
1223 | | - v.tm.tv_usec = ((sk->sk_sndtimeo % HZ) * USEC_PER_SEC) / HZ; |
---|
1224 | | - } |
---|
| 1420 | + case SO_SNDTIMEO_OLD: |
---|
| 1421 | + case SO_SNDTIMEO_NEW: |
---|
| 1422 | + lv = sock_get_timeout(sk->sk_sndtimeo, &v, SO_SNDTIMEO_OLD == optname); |
---|
1225 | 1423 | break; |
---|
1226 | 1424 | |
---|
1227 | 1425 | case SO_RCVLOWAT: |
---|
.. | .. |
---|
1354 | 1552 | #endif |
---|
1355 | 1553 | |
---|
1356 | 1554 | case SO_MAX_PACING_RATE: |
---|
1357 | | - v.val = sk->sk_max_pacing_rate; |
---|
| 1555 | + if (sizeof(v.ulval) != sizeof(v.val) && len >= sizeof(v.ulval)) { |
---|
| 1556 | + lv = sizeof(v.ulval); |
---|
| 1557 | + v.ulval = sk->sk_max_pacing_rate; |
---|
| 1558 | + } else { |
---|
| 1559 | + /* 32bit version */ |
---|
| 1560 | + v.val = min_t(unsigned long, sk->sk_max_pacing_rate, ~0U); |
---|
| 1561 | + } |
---|
1358 | 1562 | break; |
---|
1359 | 1563 | |
---|
1360 | 1564 | case SO_INCOMING_CPU: |
---|
.. | .. |
---|
1405 | 1609 | SOF_TXTIME_REPORT_ERRORS : 0; |
---|
1406 | 1610 | break; |
---|
1407 | 1611 | |
---|
| 1612 | + case SO_BINDTOIFINDEX: |
---|
| 1613 | + v.val = sk->sk_bound_dev_if; |
---|
| 1614 | + break; |
---|
| 1615 | + |
---|
1408 | 1616 | default: |
---|
1409 | 1617 | /* We implement the SO_SNDLOWAT etc to not be settable |
---|
1410 | 1618 | * (1003.1g 7). |
---|
.. | .. |
---|
1452 | 1660 | */ |
---|
1453 | 1661 | static void sock_copy(struct sock *nsk, const struct sock *osk) |
---|
1454 | 1662 | { |
---|
| 1663 | + const struct proto *prot = READ_ONCE(osk->sk_prot); |
---|
1455 | 1664 | #ifdef CONFIG_SECURITY_NETWORK |
---|
1456 | 1665 | void *sptr = nsk->sk_security; |
---|
1457 | 1666 | #endif |
---|
1458 | 1667 | memcpy(nsk, osk, offsetof(struct sock, sk_dontcopy_begin)); |
---|
1459 | 1668 | |
---|
1460 | 1669 | memcpy(&nsk->sk_dontcopy_end, &osk->sk_dontcopy_end, |
---|
1461 | | - osk->sk_prot->obj_size - offsetof(struct sock, sk_dontcopy_end)); |
---|
| 1670 | + prot->obj_size - offsetof(struct sock, sk_dontcopy_end)); |
---|
1462 | 1671 | |
---|
1463 | 1672 | #ifdef CONFIG_SECURITY_NETWORK |
---|
1464 | 1673 | nsk->sk_security = sptr; |
---|
.. | .. |
---|
1584 | 1793 | |
---|
1585 | 1794 | sock_disable_timestamp(sk, SK_FLAGS_TIMESTAMP); |
---|
1586 | 1795 | |
---|
| 1796 | +#ifdef CONFIG_BPF_SYSCALL |
---|
| 1797 | + bpf_sk_storage_free(sk); |
---|
| 1798 | +#endif |
---|
| 1799 | + |
---|
1587 | 1800 | if (atomic_read(&sk->sk_omem_alloc)) |
---|
1588 | 1801 | pr_debug("%s: optmem leakage (%d bytes) detected\n", |
---|
1589 | 1802 | __func__, atomic_read(&sk->sk_omem_alloc)); |
---|
.. | .. |
---|
1670 | 1883 | */ |
---|
1671 | 1884 | struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) |
---|
1672 | 1885 | { |
---|
1673 | | - struct sock *newsk; |
---|
| 1886 | + struct proto *prot = READ_ONCE(sk->sk_prot); |
---|
| 1887 | + struct sk_filter *filter; |
---|
1674 | 1888 | bool is_charged = true; |
---|
| 1889 | + struct sock *newsk; |
---|
1675 | 1890 | |
---|
1676 | | - newsk = sk_prot_alloc(sk->sk_prot, priority, sk->sk_family); |
---|
1677 | | - if (newsk != NULL) { |
---|
1678 | | - struct sk_filter *filter; |
---|
| 1891 | + newsk = sk_prot_alloc(prot, priority, sk->sk_family); |
---|
| 1892 | + if (!newsk) |
---|
| 1893 | + goto out; |
---|
1679 | 1894 | |
---|
1680 | | - sock_copy(newsk, sk); |
---|
| 1895 | + sock_copy(newsk, sk); |
---|
1681 | 1896 | |
---|
1682 | | - newsk->sk_prot_creator = sk->sk_prot; |
---|
| 1897 | + newsk->sk_prot_creator = prot; |
---|
1683 | 1898 | |
---|
1684 | | - /* SANITY */ |
---|
1685 | | - if (likely(newsk->sk_net_refcnt)) |
---|
1686 | | - get_net(sock_net(newsk)); |
---|
1687 | | - sk_node_init(&newsk->sk_node); |
---|
1688 | | - sock_lock_init(newsk); |
---|
1689 | | - bh_lock_sock(newsk); |
---|
1690 | | - newsk->sk_backlog.head = newsk->sk_backlog.tail = NULL; |
---|
1691 | | - newsk->sk_backlog.len = 0; |
---|
1692 | | - |
---|
1693 | | - atomic_set(&newsk->sk_rmem_alloc, 0); |
---|
1694 | | - /* |
---|
1695 | | - * sk_wmem_alloc set to one (see sk_free() and sock_wfree()) |
---|
1696 | | - */ |
---|
1697 | | - refcount_set(&newsk->sk_wmem_alloc, 1); |
---|
1698 | | - atomic_set(&newsk->sk_omem_alloc, 0); |
---|
1699 | | - sk_init_common(newsk); |
---|
1700 | | - |
---|
1701 | | - newsk->sk_dst_cache = NULL; |
---|
1702 | | - newsk->sk_dst_pending_confirm = 0; |
---|
1703 | | - newsk->sk_wmem_queued = 0; |
---|
1704 | | - newsk->sk_forward_alloc = 0; |
---|
1705 | | - atomic_set(&newsk->sk_drops, 0); |
---|
1706 | | - newsk->sk_send_head = NULL; |
---|
1707 | | - newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK; |
---|
1708 | | - atomic_set(&newsk->sk_zckey, 0); |
---|
1709 | | - |
---|
1710 | | - sock_reset_flag(newsk, SOCK_DONE); |
---|
1711 | | - |
---|
1712 | | - /* sk->sk_memcg will be populated at accept() time */ |
---|
1713 | | - newsk->sk_memcg = NULL; |
---|
1714 | | - |
---|
1715 | | - cgroup_sk_clone(&newsk->sk_cgrp_data); |
---|
1716 | | - |
---|
1717 | | - rcu_read_lock(); |
---|
1718 | | - filter = rcu_dereference(sk->sk_filter); |
---|
1719 | | - if (filter != NULL) |
---|
1720 | | - /* though it's an empty new sock, the charging may fail |
---|
1721 | | - * if sysctl_optmem_max was changed between creation of |
---|
1722 | | - * original socket and cloning |
---|
1723 | | - */ |
---|
1724 | | - is_charged = sk_filter_charge(newsk, filter); |
---|
1725 | | - RCU_INIT_POINTER(newsk->sk_filter, filter); |
---|
1726 | | - rcu_read_unlock(); |
---|
1727 | | - |
---|
1728 | | - if (unlikely(!is_charged || xfrm_sk_clone_policy(newsk, sk))) { |
---|
1729 | | - /* We need to make sure that we don't uncharge the new |
---|
1730 | | - * socket if we couldn't charge it in the first place |
---|
1731 | | - * as otherwise we uncharge the parent's filter. |
---|
1732 | | - */ |
---|
1733 | | - if (!is_charged) |
---|
1734 | | - RCU_INIT_POINTER(newsk->sk_filter, NULL); |
---|
1735 | | - sk_free_unlock_clone(newsk); |
---|
1736 | | - newsk = NULL; |
---|
1737 | | - goto out; |
---|
1738 | | - } |
---|
1739 | | - RCU_INIT_POINTER(newsk->sk_reuseport_cb, NULL); |
---|
1740 | | - |
---|
1741 | | - newsk->sk_err = 0; |
---|
1742 | | - newsk->sk_err_soft = 0; |
---|
1743 | | - newsk->sk_priority = 0; |
---|
1744 | | - newsk->sk_incoming_cpu = raw_smp_processor_id(); |
---|
1745 | | - atomic64_set(&newsk->sk_cookie, 0); |
---|
1746 | | - if (likely(newsk->sk_net_refcnt)) |
---|
1747 | | - sock_inuse_add(sock_net(newsk), 1); |
---|
1748 | | - |
---|
1749 | | - /* |
---|
1750 | | - * Before updating sk_refcnt, we must commit prior changes to memory |
---|
1751 | | - * (Documentation/RCU/rculist_nulls.txt for details) |
---|
1752 | | - */ |
---|
1753 | | - smp_wmb(); |
---|
1754 | | - refcount_set(&newsk->sk_refcnt, 2); |
---|
1755 | | - |
---|
1756 | | - /* |
---|
1757 | | - * Increment the counter in the same struct proto as the master |
---|
1758 | | - * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that |
---|
1759 | | - * is the same as sk->sk_prot->socks, as this field was copied |
---|
1760 | | - * with memcpy). |
---|
1761 | | - * |
---|
1762 | | - * This _changes_ the previous behaviour, where |
---|
1763 | | - * tcp_create_openreq_child always was incrementing the |
---|
1764 | | - * equivalent to tcp_prot->socks (inet_sock_nr), so this have |
---|
1765 | | - * to be taken into account in all callers. -acme |
---|
1766 | | - */ |
---|
1767 | | - sk_refcnt_debug_inc(newsk); |
---|
1768 | | - sk_set_socket(newsk, NULL); |
---|
1769 | | - sk_tx_queue_clear(newsk); |
---|
1770 | | - newsk->sk_wq = NULL; |
---|
1771 | | - |
---|
1772 | | - if (newsk->sk_prot->sockets_allocated) |
---|
1773 | | - sk_sockets_allocated_inc(newsk); |
---|
1774 | | - |
---|
1775 | | - if (sock_needs_netstamp(sk) && |
---|
1776 | | - newsk->sk_flags & SK_FLAGS_TIMESTAMP) |
---|
1777 | | - net_enable_timestamp(); |
---|
| 1899 | + /* SANITY */ |
---|
| 1900 | + if (likely(newsk->sk_net_refcnt)) { |
---|
| 1901 | + get_net(sock_net(newsk)); |
---|
| 1902 | + sock_inuse_add(sock_net(newsk), 1); |
---|
1778 | 1903 | } |
---|
| 1904 | + sk_node_init(&newsk->sk_node); |
---|
| 1905 | + sock_lock_init(newsk); |
---|
| 1906 | + bh_lock_sock(newsk); |
---|
| 1907 | + newsk->sk_backlog.head = newsk->sk_backlog.tail = NULL; |
---|
| 1908 | + newsk->sk_backlog.len = 0; |
---|
| 1909 | + |
---|
| 1910 | + atomic_set(&newsk->sk_rmem_alloc, 0); |
---|
| 1911 | + |
---|
| 1912 | + /* sk_wmem_alloc set to one (see sk_free() and sock_wfree()) */ |
---|
| 1913 | + refcount_set(&newsk->sk_wmem_alloc, 1); |
---|
| 1914 | + |
---|
| 1915 | + atomic_set(&newsk->sk_omem_alloc, 0); |
---|
| 1916 | + sk_init_common(newsk); |
---|
| 1917 | + |
---|
| 1918 | + newsk->sk_dst_cache = NULL; |
---|
| 1919 | + newsk->sk_dst_pending_confirm = 0; |
---|
| 1920 | + newsk->sk_wmem_queued = 0; |
---|
| 1921 | + newsk->sk_forward_alloc = 0; |
---|
| 1922 | + atomic_set(&newsk->sk_drops, 0); |
---|
| 1923 | + newsk->sk_send_head = NULL; |
---|
| 1924 | + newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK; |
---|
| 1925 | + atomic_set(&newsk->sk_zckey, 0); |
---|
| 1926 | + |
---|
| 1927 | + sock_reset_flag(newsk, SOCK_DONE); |
---|
| 1928 | + |
---|
| 1929 | + /* sk->sk_memcg will be populated at accept() time */ |
---|
| 1930 | + newsk->sk_memcg = NULL; |
---|
| 1931 | + |
---|
| 1932 | + cgroup_sk_clone(&newsk->sk_cgrp_data); |
---|
| 1933 | + |
---|
| 1934 | + rcu_read_lock(); |
---|
| 1935 | + filter = rcu_dereference(sk->sk_filter); |
---|
| 1936 | + if (filter != NULL) |
---|
| 1937 | + /* though it's an empty new sock, the charging may fail |
---|
| 1938 | + * if sysctl_optmem_max was changed between creation of |
---|
| 1939 | + * original socket and cloning |
---|
| 1940 | + */ |
---|
| 1941 | + is_charged = sk_filter_charge(newsk, filter); |
---|
| 1942 | + RCU_INIT_POINTER(newsk->sk_filter, filter); |
---|
| 1943 | + rcu_read_unlock(); |
---|
| 1944 | + |
---|
| 1945 | + if (unlikely(!is_charged || xfrm_sk_clone_policy(newsk, sk))) { |
---|
| 1946 | + /* We need to make sure that we don't uncharge the new |
---|
| 1947 | + * socket if we couldn't charge it in the first place |
---|
| 1948 | + * as otherwise we uncharge the parent's filter. |
---|
| 1949 | + */ |
---|
| 1950 | + if (!is_charged) |
---|
| 1951 | + RCU_INIT_POINTER(newsk->sk_filter, NULL); |
---|
| 1952 | + sk_free_unlock_clone(newsk); |
---|
| 1953 | + newsk = NULL; |
---|
| 1954 | + goto out; |
---|
| 1955 | + } |
---|
| 1956 | + RCU_INIT_POINTER(newsk->sk_reuseport_cb, NULL); |
---|
| 1957 | + |
---|
| 1958 | + if (bpf_sk_storage_clone(sk, newsk)) { |
---|
| 1959 | + sk_free_unlock_clone(newsk); |
---|
| 1960 | + newsk = NULL; |
---|
| 1961 | + goto out; |
---|
| 1962 | + } |
---|
| 1963 | + |
---|
| 1964 | + /* Clear sk_user_data if parent had the pointer tagged |
---|
| 1965 | + * as not suitable for copying when cloning. |
---|
| 1966 | + */ |
---|
| 1967 | + if (sk_user_data_is_nocopy(newsk)) |
---|
| 1968 | + newsk->sk_user_data = NULL; |
---|
| 1969 | + |
---|
| 1970 | + newsk->sk_err = 0; |
---|
| 1971 | + newsk->sk_err_soft = 0; |
---|
| 1972 | + newsk->sk_priority = 0; |
---|
| 1973 | + newsk->sk_incoming_cpu = raw_smp_processor_id(); |
---|
| 1974 | + |
---|
| 1975 | + /* Before updating sk_refcnt, we must commit prior changes to memory |
---|
| 1976 | + * (Documentation/RCU/rculist_nulls.rst for details) |
---|
| 1977 | + */ |
---|
| 1978 | + smp_wmb(); |
---|
| 1979 | + refcount_set(&newsk->sk_refcnt, 2); |
---|
| 1980 | + |
---|
| 1981 | + /* Increment the counter in the same struct proto as the master |
---|
| 1982 | + * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that |
---|
| 1983 | + * is the same as sk->sk_prot->socks, as this field was copied |
---|
| 1984 | + * with memcpy). |
---|
| 1985 | + * |
---|
| 1986 | + * This _changes_ the previous behaviour, where |
---|
| 1987 | + * tcp_create_openreq_child always was incrementing the |
---|
| 1988 | + * equivalent to tcp_prot->socks (inet_sock_nr), so this have |
---|
| 1989 | + * to be taken into account in all callers. -acme |
---|
| 1990 | + */ |
---|
| 1991 | + sk_refcnt_debug_inc(newsk); |
---|
| 1992 | + sk_set_socket(newsk, NULL); |
---|
| 1993 | + sk_tx_queue_clear(newsk); |
---|
| 1994 | + RCU_INIT_POINTER(newsk->sk_wq, NULL); |
---|
| 1995 | + |
---|
| 1996 | + if (newsk->sk_prot->sockets_allocated) |
---|
| 1997 | + sk_sockets_allocated_inc(newsk); |
---|
| 1998 | + |
---|
| 1999 | + if (sock_needs_netstamp(sk) && newsk->sk_flags & SK_FLAGS_TIMESTAMP) |
---|
| 2000 | + net_enable_timestamp(); |
---|
1779 | 2001 | out: |
---|
1780 | 2002 | return newsk; |
---|
1781 | 2003 | } |
---|
.. | .. |
---|
1877 | 2099 | } |
---|
1878 | 2100 | EXPORT_SYMBOL(skb_set_owner_w); |
---|
1879 | 2101 | |
---|
| 2102 | +static bool can_skb_orphan_partial(const struct sk_buff *skb) |
---|
| 2103 | +{ |
---|
| 2104 | +#ifdef CONFIG_TLS_DEVICE |
---|
| 2105 | + /* Drivers depend on in-order delivery for crypto offload, |
---|
| 2106 | + * partial orphan breaks out-of-order-OK logic. |
---|
| 2107 | + */ |
---|
| 2108 | + if (skb->decrypted) |
---|
| 2109 | + return false; |
---|
| 2110 | +#endif |
---|
| 2111 | + return (skb->destructor == sock_wfree || |
---|
| 2112 | + (IS_ENABLED(CONFIG_INET) && skb->destructor == tcp_wfree)); |
---|
| 2113 | +} |
---|
| 2114 | + |
---|
1880 | 2115 | /* This helper is used by netem, as it can hold packets in its |
---|
1881 | 2116 | * delay queue. We want to allow the owner socket to send more |
---|
1882 | 2117 | * packets, as if they were already TX completed by a typical driver. |
---|
.. | .. |
---|
1888 | 2123 | if (skb_is_tcp_pure_ack(skb)) |
---|
1889 | 2124 | return; |
---|
1890 | 2125 | |
---|
1891 | | - if (skb->destructor == sock_wfree |
---|
1892 | | -#ifdef CONFIG_INET |
---|
1893 | | - || skb->destructor == tcp_wfree |
---|
1894 | | -#endif |
---|
1895 | | - ) { |
---|
1896 | | - struct sock *sk = skb->sk; |
---|
| 2126 | + if (can_skb_orphan_partial(skb) && skb_set_owner_sk_safe(skb, skb->sk)) |
---|
| 2127 | + return; |
---|
1897 | 2128 | |
---|
1898 | | - if (refcount_inc_not_zero(&sk->sk_refcnt)) { |
---|
1899 | | - WARN_ON(refcount_sub_and_test(skb->truesize, &sk->sk_wmem_alloc)); |
---|
1900 | | - skb->destructor = sock_efree; |
---|
1901 | | - } |
---|
1902 | | - } else { |
---|
1903 | | - skb_orphan(skb); |
---|
1904 | | - } |
---|
| 2129 | + skb_orphan(skb); |
---|
1905 | 2130 | } |
---|
1906 | 2131 | EXPORT_SYMBOL(skb_orphan_partial); |
---|
1907 | 2132 | |
---|
.. | .. |
---|
1927 | 2152 | sock_put(skb->sk); |
---|
1928 | 2153 | } |
---|
1929 | 2154 | EXPORT_SYMBOL(sock_efree); |
---|
| 2155 | + |
---|
| 2156 | +/* Buffer destructor for prefetch/receive path where reference count may |
---|
| 2157 | + * not be held, e.g. for listen sockets. |
---|
| 2158 | + */ |
---|
| 2159 | +#ifdef CONFIG_INET |
---|
| 2160 | +void sock_pfree(struct sk_buff *skb) |
---|
| 2161 | +{ |
---|
| 2162 | + if (sk_is_refcounted(skb->sk)) |
---|
| 2163 | + sock_gen_put(skb->sk); |
---|
| 2164 | +} |
---|
| 2165 | +EXPORT_SYMBOL(sock_pfree); |
---|
| 2166 | +#endif /* CONFIG_INET */ |
---|
1930 | 2167 | |
---|
1931 | 2168 | kuid_t sock_i_uid(struct sock *sk) |
---|
1932 | 2169 | { |
---|
.. | .. |
---|
1956 | 2193 | struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, |
---|
1957 | 2194 | gfp_t priority) |
---|
1958 | 2195 | { |
---|
1959 | | - if (force || refcount_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) { |
---|
| 2196 | + if (force || |
---|
| 2197 | + refcount_read(&sk->sk_wmem_alloc) < READ_ONCE(sk->sk_sndbuf)) { |
---|
1960 | 2198 | struct sk_buff *skb = alloc_skb(size, priority); |
---|
| 2199 | + |
---|
1961 | 2200 | if (skb) { |
---|
1962 | 2201 | skb_set_owner_w(skb, sk); |
---|
1963 | 2202 | return skb; |
---|
.. | .. |
---|
1981 | 2220 | |
---|
1982 | 2221 | /* small safe race: SKB_TRUESIZE may differ from final skb->truesize */ |
---|
1983 | 2222 | if (atomic_read(&sk->sk_omem_alloc) + SKB_TRUESIZE(size) > |
---|
1984 | | - sysctl_optmem_max) |
---|
| 2223 | + READ_ONCE(sysctl_optmem_max)) |
---|
1985 | 2224 | return NULL; |
---|
1986 | 2225 | |
---|
1987 | 2226 | skb = alloc_skb(size, priority); |
---|
.. | .. |
---|
1999 | 2238 | */ |
---|
2000 | 2239 | void *sock_kmalloc(struct sock *sk, int size, gfp_t priority) |
---|
2001 | 2240 | { |
---|
2002 | | - if ((unsigned int)size <= sysctl_optmem_max && |
---|
2003 | | - atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) { |
---|
| 2241 | + int optmem_max = READ_ONCE(sysctl_optmem_max); |
---|
| 2242 | + |
---|
| 2243 | + if ((unsigned int)size <= optmem_max && |
---|
| 2244 | + atomic_read(&sk->sk_omem_alloc) + size < optmem_max) { |
---|
2004 | 2245 | void *mem; |
---|
2005 | 2246 | /* First do the add, to avoid the race if kmalloc |
---|
2006 | 2247 | * might sleep. |
---|
.. | .. |
---|
2025 | 2266 | if (WARN_ON_ONCE(!mem)) |
---|
2026 | 2267 | return; |
---|
2027 | 2268 | if (nullify) |
---|
2028 | | - kzfree(mem); |
---|
| 2269 | + kfree_sensitive(mem); |
---|
2029 | 2270 | else |
---|
2030 | 2271 | kfree(mem); |
---|
2031 | 2272 | atomic_sub(size, &sk->sk_omem_alloc); |
---|
.. | .. |
---|
2058 | 2299 | break; |
---|
2059 | 2300 | set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); |
---|
2060 | 2301 | prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); |
---|
2061 | | - if (refcount_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) |
---|
| 2302 | + if (refcount_read(&sk->sk_wmem_alloc) < READ_ONCE(sk->sk_sndbuf)) |
---|
2062 | 2303 | break; |
---|
2063 | 2304 | if (sk->sk_shutdown & SEND_SHUTDOWN) |
---|
2064 | 2305 | break; |
---|
.. | .. |
---|
2093 | 2334 | if (sk->sk_shutdown & SEND_SHUTDOWN) |
---|
2094 | 2335 | goto failure; |
---|
2095 | 2336 | |
---|
2096 | | - if (sk_wmem_alloc_get(sk) < sk->sk_sndbuf) |
---|
| 2337 | + if (sk_wmem_alloc_get(sk) < READ_ONCE(sk->sk_sndbuf)) |
---|
2097 | 2338 | break; |
---|
2098 | 2339 | |
---|
2099 | 2340 | sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); |
---|
.. | .. |
---|
2139 | 2380 | return -EINVAL; |
---|
2140 | 2381 | sockc->mark = *(u32 *)CMSG_DATA(cmsg); |
---|
2141 | 2382 | break; |
---|
2142 | | - case SO_TIMESTAMPING: |
---|
| 2383 | + case SO_TIMESTAMPING_OLD: |
---|
2143 | 2384 | if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32))) |
---|
2144 | 2385 | return -EINVAL; |
---|
2145 | 2386 | |
---|
.. | .. |
---|
2207 | 2448 | } |
---|
2208 | 2449 | } |
---|
2209 | 2450 | |
---|
2210 | | -/* On 32bit arches, an skb frag is limited to 2^15 */ |
---|
2211 | 2451 | #define SKB_FRAG_PAGE_ORDER get_order(32768) |
---|
| 2452 | +DEFINE_STATIC_KEY_FALSE(net_high_order_alloc_disable_key); |
---|
2212 | 2453 | |
---|
2213 | 2454 | /** |
---|
2214 | 2455 | * skb_page_frag_refill - check that a page_frag contains enough room |
---|
.. | .. |
---|
2233 | 2474 | } |
---|
2234 | 2475 | |
---|
2235 | 2476 | pfrag->offset = 0; |
---|
2236 | | - if (SKB_FRAG_PAGE_ORDER) { |
---|
| 2477 | + if (SKB_FRAG_PAGE_ORDER && |
---|
| 2478 | + !static_branch_unlikely(&net_high_order_alloc_disable_key)) { |
---|
2237 | 2479 | /* Avoid direct reclaim but allow kswapd to wake */ |
---|
2238 | 2480 | pfrag->page = alloc_pages((gfp & ~__GFP_DIRECT_RECLAIM) | |
---|
2239 | 2481 | __GFP_COMP | __GFP_NOWARN | |
---|
.. | .. |
---|
2263 | 2505 | return false; |
---|
2264 | 2506 | } |
---|
2265 | 2507 | EXPORT_SYMBOL(sk_page_frag_refill); |
---|
2266 | | - |
---|
2267 | | -int sk_alloc_sg(struct sock *sk, int len, struct scatterlist *sg, |
---|
2268 | | - int sg_start, int *sg_curr_index, unsigned int *sg_curr_size, |
---|
2269 | | - int first_coalesce) |
---|
2270 | | -{ |
---|
2271 | | - int sg_curr = *sg_curr_index, use = 0, rc = 0; |
---|
2272 | | - unsigned int size = *sg_curr_size; |
---|
2273 | | - struct page_frag *pfrag; |
---|
2274 | | - struct scatterlist *sge; |
---|
2275 | | - |
---|
2276 | | - len -= size; |
---|
2277 | | - pfrag = sk_page_frag(sk); |
---|
2278 | | - |
---|
2279 | | - while (len > 0) { |
---|
2280 | | - unsigned int orig_offset; |
---|
2281 | | - |
---|
2282 | | - if (!sk_page_frag_refill(sk, pfrag)) { |
---|
2283 | | - rc = -ENOMEM; |
---|
2284 | | - goto out; |
---|
2285 | | - } |
---|
2286 | | - |
---|
2287 | | - use = min_t(int, len, pfrag->size - pfrag->offset); |
---|
2288 | | - |
---|
2289 | | - if (!sk_wmem_schedule(sk, use)) { |
---|
2290 | | - rc = -ENOMEM; |
---|
2291 | | - goto out; |
---|
2292 | | - } |
---|
2293 | | - |
---|
2294 | | - sk_mem_charge(sk, use); |
---|
2295 | | - size += use; |
---|
2296 | | - orig_offset = pfrag->offset; |
---|
2297 | | - pfrag->offset += use; |
---|
2298 | | - |
---|
2299 | | - sge = sg + sg_curr - 1; |
---|
2300 | | - if (sg_curr > first_coalesce && sg_page(sge) == pfrag->page && |
---|
2301 | | - sge->offset + sge->length == orig_offset) { |
---|
2302 | | - sge->length += use; |
---|
2303 | | - } else { |
---|
2304 | | - sge = sg + sg_curr; |
---|
2305 | | - sg_unmark_end(sge); |
---|
2306 | | - sg_set_page(sge, pfrag->page, use, orig_offset); |
---|
2307 | | - get_page(pfrag->page); |
---|
2308 | | - sg_curr++; |
---|
2309 | | - |
---|
2310 | | - if (sg_curr == MAX_SKB_FRAGS) |
---|
2311 | | - sg_curr = 0; |
---|
2312 | | - |
---|
2313 | | - if (sg_curr == sg_start) { |
---|
2314 | | - rc = -ENOSPC; |
---|
2315 | | - break; |
---|
2316 | | - } |
---|
2317 | | - } |
---|
2318 | | - |
---|
2319 | | - len -= use; |
---|
2320 | | - } |
---|
2321 | | -out: |
---|
2322 | | - *sg_curr_size = size; |
---|
2323 | | - *sg_curr_index = sg_curr; |
---|
2324 | | - return rc; |
---|
2325 | | -} |
---|
2326 | | -EXPORT_SYMBOL(sk_alloc_sg); |
---|
2327 | 2508 | |
---|
2328 | 2509 | static void __lock_sock(struct sock *sk) |
---|
2329 | 2510 | __releases(&sk->sk_lock.slock) |
---|
.. | .. |
---|
2358 | 2539 | next = skb->next; |
---|
2359 | 2540 | prefetch(next); |
---|
2360 | 2541 | WARN_ON_ONCE(skb_dst_is_noref(skb)); |
---|
2361 | | - skb->next = NULL; |
---|
| 2542 | + skb_mark_not_on_list(skb); |
---|
2362 | 2543 | sk_backlog_rcv(sk, skb); |
---|
2363 | 2544 | |
---|
2364 | 2545 | cond_resched(); |
---|
.. | .. |
---|
2614 | 2795 | } |
---|
2615 | 2796 | EXPORT_SYMBOL(sock_no_shutdown); |
---|
2616 | 2797 | |
---|
2617 | | -int sock_no_setsockopt(struct socket *sock, int level, int optname, |
---|
2618 | | - char __user *optval, unsigned int optlen) |
---|
2619 | | -{ |
---|
2620 | | - return -EOPNOTSUPP; |
---|
2621 | | -} |
---|
2622 | | -EXPORT_SYMBOL(sock_no_setsockopt); |
---|
2623 | | - |
---|
2624 | | -int sock_no_getsockopt(struct socket *sock, int level, int optname, |
---|
2625 | | - char __user *optval, int __user *optlen) |
---|
2626 | | -{ |
---|
2627 | | - return -EOPNOTSUPP; |
---|
2628 | | -} |
---|
2629 | | -EXPORT_SYMBOL(sock_no_getsockopt); |
---|
2630 | | - |
---|
2631 | 2798 | int sock_no_sendmsg(struct socket *sock, struct msghdr *m, size_t len) |
---|
2632 | 2799 | { |
---|
2633 | 2800 | return -EOPNOTSUPP; |
---|
.. | .. |
---|
2732 | 2899 | rcu_read_unlock(); |
---|
2733 | 2900 | } |
---|
2734 | 2901 | |
---|
2735 | | -static void sock_def_readable(struct sock *sk) |
---|
| 2902 | +void sock_def_readable(struct sock *sk) |
---|
2736 | 2903 | { |
---|
2737 | 2904 | struct socket_wq *wq; |
---|
2738 | 2905 | |
---|
2739 | 2906 | rcu_read_lock(); |
---|
2740 | 2907 | wq = rcu_dereference(sk->sk_wq); |
---|
2741 | | - if (skwq_has_sleeper(wq)) |
---|
| 2908 | + |
---|
| 2909 | + if (skwq_has_sleeper(wq)) { |
---|
| 2910 | + int done = 0; |
---|
| 2911 | + |
---|
| 2912 | + trace_android_vh_do_wake_up_sync(&wq->wait, &done); |
---|
| 2913 | + if (done) |
---|
| 2914 | + goto out; |
---|
| 2915 | + |
---|
2742 | 2916 | wake_up_interruptible_sync_poll(&wq->wait, EPOLLIN | EPOLLPRI | |
---|
2743 | 2917 | EPOLLRDNORM | EPOLLRDBAND); |
---|
| 2918 | + } |
---|
| 2919 | + |
---|
| 2920 | +out: |
---|
2744 | 2921 | sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN); |
---|
2745 | 2922 | rcu_read_unlock(); |
---|
2746 | 2923 | } |
---|
.. | .. |
---|
2754 | 2931 | /* Do not wake up a writer until he can make "significant" |
---|
2755 | 2932 | * progress. --DaveM |
---|
2756 | 2933 | */ |
---|
2757 | | - if ((refcount_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) { |
---|
| 2934 | + if ((refcount_read(&sk->sk_wmem_alloc) << 1) <= READ_ONCE(sk->sk_sndbuf)) { |
---|
2758 | 2935 | wq = rcu_dereference(sk->sk_wq); |
---|
2759 | 2936 | if (skwq_has_sleeper(wq)) |
---|
2760 | 2937 | wake_up_interruptible_sync_poll(&wq->wait, EPOLLOUT | |
---|
.. | .. |
---|
2795 | 2972 | } |
---|
2796 | 2973 | EXPORT_SYMBOL(sk_stop_timer); |
---|
2797 | 2974 | |
---|
| 2975 | +void sk_stop_timer_sync(struct sock *sk, struct timer_list *timer) |
---|
| 2976 | +{ |
---|
| 2977 | + if (del_timer_sync(timer)) |
---|
| 2978 | + __sock_put(sk); |
---|
| 2979 | +} |
---|
| 2980 | +EXPORT_SYMBOL(sk_stop_timer_sync); |
---|
| 2981 | + |
---|
2798 | 2982 | void sock_init_data(struct socket *sock, struct sock *sk) |
---|
2799 | 2983 | { |
---|
2800 | 2984 | sk_init_common(sk); |
---|
.. | .. |
---|
2803 | 2987 | timer_setup(&sk->sk_timer, NULL, 0); |
---|
2804 | 2988 | |
---|
2805 | 2989 | sk->sk_allocation = GFP_KERNEL; |
---|
2806 | | - sk->sk_rcvbuf = sysctl_rmem_default; |
---|
2807 | | - sk->sk_sndbuf = sysctl_wmem_default; |
---|
| 2990 | + sk->sk_rcvbuf = READ_ONCE(sysctl_rmem_default); |
---|
| 2991 | + sk->sk_sndbuf = READ_ONCE(sysctl_wmem_default); |
---|
2808 | 2992 | sk->sk_state = TCP_CLOSE; |
---|
2809 | 2993 | sk_set_socket(sk, sock); |
---|
2810 | 2994 | |
---|
.. | .. |
---|
2812 | 2996 | |
---|
2813 | 2997 | if (sock) { |
---|
2814 | 2998 | sk->sk_type = sock->type; |
---|
2815 | | - sk->sk_wq = sock->wq; |
---|
| 2999 | + RCU_INIT_POINTER(sk->sk_wq, &sock->wq); |
---|
2816 | 3000 | sock->sk = sk; |
---|
2817 | 3001 | sk->sk_uid = SOCK_INODE(sock)->i_uid; |
---|
2818 | 3002 | } else { |
---|
2819 | | - sk->sk_wq = NULL; |
---|
| 3003 | + RCU_INIT_POINTER(sk->sk_wq, NULL); |
---|
2820 | 3004 | sk->sk_uid = make_kuid(sock_net(sk)->user_ns, 0); |
---|
2821 | 3005 | } |
---|
2822 | 3006 | |
---|
.. | .. |
---|
2859 | 3043 | |
---|
2860 | 3044 | #ifdef CONFIG_NET_RX_BUSY_POLL |
---|
2861 | 3045 | sk->sk_napi_id = 0; |
---|
2862 | | - sk->sk_ll_usec = sysctl_net_busy_read; |
---|
| 3046 | + sk->sk_ll_usec = READ_ONCE(sysctl_net_busy_read); |
---|
2863 | 3047 | #endif |
---|
2864 | 3048 | |
---|
2865 | | - sk->sk_max_pacing_rate = ~0U; |
---|
2866 | | - sk->sk_pacing_rate = ~0U; |
---|
2867 | | - sk->sk_pacing_shift = 10; |
---|
| 3049 | + sk->sk_max_pacing_rate = ~0UL; |
---|
| 3050 | + sk->sk_pacing_rate = ~0UL; |
---|
| 3051 | + WRITE_ONCE(sk->sk_pacing_shift, 10); |
---|
2868 | 3052 | sk->sk_incoming_cpu = -1; |
---|
2869 | 3053 | |
---|
2870 | 3054 | sk_rx_queue_clear(sk); |
---|
2871 | 3055 | /* |
---|
2872 | 3056 | * Before updating sk_refcnt, we must commit prior changes to memory |
---|
2873 | | - * (Documentation/RCU/rculist_nulls.txt for details) |
---|
| 3057 | + * (Documentation/RCU/rculist_nulls.rst for details) |
---|
2874 | 3058 | */ |
---|
2875 | 3059 | smp_wmb(); |
---|
2876 | 3060 | refcount_set(&sk->sk_refcnt, 1); |
---|
.. | .. |
---|
2885 | 3069 | if (sk->sk_lock.owned) |
---|
2886 | 3070 | __lock_sock(sk); |
---|
2887 | 3071 | sk->sk_lock.owned = 1; |
---|
2888 | | - spin_unlock(&sk->sk_lock.slock); |
---|
| 3072 | + spin_unlock_bh(&sk->sk_lock.slock); |
---|
2889 | 3073 | /* |
---|
2890 | 3074 | * The sk_lock has mutex_lock() semantics here: |
---|
2891 | 3075 | */ |
---|
2892 | 3076 | mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_); |
---|
2893 | | - local_bh_enable(); |
---|
2894 | 3077 | } |
---|
2895 | 3078 | EXPORT_SYMBOL(lock_sock_nested); |
---|
2896 | 3079 | |
---|
.. | .. |
---|
2939 | 3122 | |
---|
2940 | 3123 | __lock_sock(sk); |
---|
2941 | 3124 | sk->sk_lock.owned = 1; |
---|
2942 | | - spin_unlock(&sk->sk_lock.slock); |
---|
| 3125 | + spin_unlock_bh(&sk->sk_lock.slock); |
---|
2943 | 3126 | /* |
---|
2944 | 3127 | * The sk_lock has mutex_lock() semantics here: |
---|
2945 | 3128 | */ |
---|
2946 | 3129 | mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_); |
---|
2947 | | - local_bh_enable(); |
---|
2948 | 3130 | return true; |
---|
2949 | 3131 | } |
---|
2950 | 3132 | EXPORT_SYMBOL(lock_sock_fast); |
---|
2951 | 3133 | |
---|
2952 | | -int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp) |
---|
| 3134 | +int sock_gettstamp(struct socket *sock, void __user *userstamp, |
---|
| 3135 | + bool timeval, bool time32) |
---|
2953 | 3136 | { |
---|
2954 | | - struct timeval tv; |
---|
| 3137 | + struct sock *sk = sock->sk; |
---|
| 3138 | + struct timespec64 ts; |
---|
2955 | 3139 | |
---|
2956 | 3140 | sock_enable_timestamp(sk, SOCK_TIMESTAMP); |
---|
2957 | | - tv = ktime_to_timeval(sock_read_timestamp(sk)); |
---|
2958 | | - if (tv.tv_sec == -1) |
---|
2959 | | - return -ENOENT; |
---|
2960 | | - if (tv.tv_sec == 0) { |
---|
2961 | | - ktime_t kt = ktime_get_real(); |
---|
2962 | | - sock_write_timestamp(sk, kt); |
---|
2963 | | - tv = ktime_to_timeval(kt); |
---|
2964 | | - } |
---|
2965 | | - return copy_to_user(userstamp, &tv, sizeof(tv)) ? -EFAULT : 0; |
---|
2966 | | -} |
---|
2967 | | -EXPORT_SYMBOL(sock_get_timestamp); |
---|
2968 | | - |
---|
2969 | | -int sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp) |
---|
2970 | | -{ |
---|
2971 | | - struct timespec ts; |
---|
2972 | | - |
---|
2973 | | - sock_enable_timestamp(sk, SOCK_TIMESTAMP); |
---|
2974 | | - ts = ktime_to_timespec(sock_read_timestamp(sk)); |
---|
| 3141 | + ts = ktime_to_timespec64(sock_read_timestamp(sk)); |
---|
2975 | 3142 | if (ts.tv_sec == -1) |
---|
2976 | 3143 | return -ENOENT; |
---|
2977 | 3144 | if (ts.tv_sec == 0) { |
---|
2978 | 3145 | ktime_t kt = ktime_get_real(); |
---|
2979 | 3146 | sock_write_timestamp(sk, kt); |
---|
2980 | | - ts = ktime_to_timespec(sk->sk_stamp); |
---|
| 3147 | + ts = ktime_to_timespec64(kt); |
---|
2981 | 3148 | } |
---|
2982 | | - return copy_to_user(userstamp, &ts, sizeof(ts)) ? -EFAULT : 0; |
---|
2983 | | -} |
---|
2984 | | -EXPORT_SYMBOL(sock_get_timestampns); |
---|
2985 | 3149 | |
---|
2986 | | -void sock_enable_timestamp(struct sock *sk, int flag) |
---|
| 3150 | + if (timeval) |
---|
| 3151 | + ts.tv_nsec /= 1000; |
---|
| 3152 | + |
---|
| 3153 | +#ifdef CONFIG_COMPAT_32BIT_TIME |
---|
| 3154 | + if (time32) |
---|
| 3155 | + return put_old_timespec32(&ts, userstamp); |
---|
| 3156 | +#endif |
---|
| 3157 | +#ifdef CONFIG_SPARC64 |
---|
| 3158 | + /* beware of padding in sparc64 timeval */ |
---|
| 3159 | + if (timeval && !in_compat_syscall()) { |
---|
| 3160 | + struct __kernel_old_timeval __user tv = { |
---|
| 3161 | + .tv_sec = ts.tv_sec, |
---|
| 3162 | + .tv_usec = ts.tv_nsec, |
---|
| 3163 | + }; |
---|
| 3164 | + if (copy_to_user(userstamp, &tv, sizeof(tv))) |
---|
| 3165 | + return -EFAULT; |
---|
| 3166 | + return 0; |
---|
| 3167 | + } |
---|
| 3168 | +#endif |
---|
| 3169 | + return put_timespec64(&ts, userstamp); |
---|
| 3170 | +} |
---|
| 3171 | +EXPORT_SYMBOL(sock_gettstamp); |
---|
| 3172 | + |
---|
| 3173 | +void sock_enable_timestamp(struct sock *sk, enum sock_flags flag) |
---|
2987 | 3174 | { |
---|
2988 | 3175 | if (!sock_flag(sk, flag)) { |
---|
2989 | 3176 | unsigned long previous_flags = sk->sk_flags; |
---|
.. | .. |
---|
3052 | 3239 | } |
---|
3053 | 3240 | EXPORT_SYMBOL(sock_common_getsockopt); |
---|
3054 | 3241 | |
---|
3055 | | -#ifdef CONFIG_COMPAT |
---|
3056 | | -int compat_sock_common_getsockopt(struct socket *sock, int level, int optname, |
---|
3057 | | - char __user *optval, int __user *optlen) |
---|
3058 | | -{ |
---|
3059 | | - struct sock *sk = sock->sk; |
---|
3060 | | - |
---|
3061 | | - if (sk->sk_prot->compat_getsockopt != NULL) |
---|
3062 | | - return sk->sk_prot->compat_getsockopt(sk, level, optname, |
---|
3063 | | - optval, optlen); |
---|
3064 | | - return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen); |
---|
3065 | | -} |
---|
3066 | | -EXPORT_SYMBOL(compat_sock_common_getsockopt); |
---|
3067 | | -#endif |
---|
3068 | | - |
---|
3069 | 3242 | int sock_common_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, |
---|
3070 | 3243 | int flags) |
---|
3071 | 3244 | { |
---|
.. | .. |
---|
3085 | 3258 | * Set socket options on an inet socket. |
---|
3086 | 3259 | */ |
---|
3087 | 3260 | int sock_common_setsockopt(struct socket *sock, int level, int optname, |
---|
3088 | | - char __user *optval, unsigned int optlen) |
---|
| 3261 | + sockptr_t optval, unsigned int optlen) |
---|
3089 | 3262 | { |
---|
3090 | 3263 | struct sock *sk = sock->sk; |
---|
3091 | 3264 | |
---|
.. | .. |
---|
3093 | 3266 | } |
---|
3094 | 3267 | EXPORT_SYMBOL(sock_common_setsockopt); |
---|
3095 | 3268 | |
---|
3096 | | -#ifdef CONFIG_COMPAT |
---|
3097 | | -int compat_sock_common_setsockopt(struct socket *sock, int level, int optname, |
---|
3098 | | - char __user *optval, unsigned int optlen) |
---|
3099 | | -{ |
---|
3100 | | - struct sock *sk = sock->sk; |
---|
3101 | | - |
---|
3102 | | - if (sk->sk_prot->compat_setsockopt != NULL) |
---|
3103 | | - return sk->sk_prot->compat_setsockopt(sk, level, optname, |
---|
3104 | | - optval, optlen); |
---|
3105 | | - return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen); |
---|
3106 | | -} |
---|
3107 | | -EXPORT_SYMBOL(compat_sock_common_setsockopt); |
---|
3108 | | -#endif |
---|
3109 | | - |
---|
3110 | 3269 | void sk_common_release(struct sock *sk) |
---|
3111 | 3270 | { |
---|
3112 | 3271 | if (sk->sk_prot->destroy) |
---|
3113 | 3272 | sk->sk_prot->destroy(sk); |
---|
3114 | 3273 | |
---|
3115 | 3274 | /* |
---|
3116 | | - * Observation: when sock_common_release is called, processes have |
---|
| 3275 | + * Observation: when sk_common_release is called, processes have |
---|
3117 | 3276 | * no access to socket. But net still has. |
---|
3118 | 3277 | * Step one, detach it from networking: |
---|
3119 | 3278 | * |
---|
.. | .. |
---|
3149 | 3308 | memset(mem, 0, sizeof(*mem) * SK_MEMINFO_VARS); |
---|
3150 | 3309 | |
---|
3151 | 3310 | mem[SK_MEMINFO_RMEM_ALLOC] = sk_rmem_alloc_get(sk); |
---|
3152 | | - mem[SK_MEMINFO_RCVBUF] = sk->sk_rcvbuf; |
---|
| 3311 | + mem[SK_MEMINFO_RCVBUF] = READ_ONCE(sk->sk_rcvbuf); |
---|
3153 | 3312 | mem[SK_MEMINFO_WMEM_ALLOC] = sk_wmem_alloc_get(sk); |
---|
3154 | | - mem[SK_MEMINFO_SNDBUF] = sk->sk_sndbuf; |
---|
| 3313 | + mem[SK_MEMINFO_SNDBUF] = READ_ONCE(sk->sk_sndbuf); |
---|
3155 | 3314 | mem[SK_MEMINFO_FWD_ALLOC] = sk->sk_forward_alloc; |
---|
3156 | | - mem[SK_MEMINFO_WMEM_QUEUED] = sk->sk_wmem_queued; |
---|
| 3315 | + mem[SK_MEMINFO_WMEM_QUEUED] = READ_ONCE(sk->sk_wmem_queued); |
---|
3157 | 3316 | mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc); |
---|
3158 | | - mem[SK_MEMINFO_BACKLOG] = sk->sk_backlog.len; |
---|
| 3317 | + mem[SK_MEMINFO_BACKLOG] = READ_ONCE(sk->sk_backlog.len); |
---|
3159 | 3318 | mem[SK_MEMINFO_DROPS] = atomic_read(&sk->sk_drops); |
---|
3160 | 3319 | } |
---|
3161 | 3320 | |
---|
.. | .. |
---|
3240 | 3399 | |
---|
3241 | 3400 | core_initcall(net_inuse_init); |
---|
3242 | 3401 | |
---|
3243 | | -static void assign_proto_idx(struct proto *prot) |
---|
| 3402 | +static int assign_proto_idx(struct proto *prot) |
---|
3244 | 3403 | { |
---|
3245 | 3404 | prot->inuse_idx = find_first_zero_bit(proto_inuse_idx, PROTO_INUSE_NR); |
---|
3246 | 3405 | |
---|
3247 | 3406 | if (unlikely(prot->inuse_idx == PROTO_INUSE_NR - 1)) { |
---|
3248 | 3407 | pr_err("PROTO_INUSE_NR exhausted\n"); |
---|
3249 | | - return; |
---|
| 3408 | + return -ENOSPC; |
---|
3250 | 3409 | } |
---|
3251 | 3410 | |
---|
3252 | 3411 | set_bit(prot->inuse_idx, proto_inuse_idx); |
---|
| 3412 | + return 0; |
---|
3253 | 3413 | } |
---|
3254 | 3414 | |
---|
3255 | 3415 | static void release_proto_idx(struct proto *prot) |
---|
.. | .. |
---|
3258 | 3418 | clear_bit(prot->inuse_idx, proto_inuse_idx); |
---|
3259 | 3419 | } |
---|
3260 | 3420 | #else |
---|
3261 | | -static inline void assign_proto_idx(struct proto *prot) |
---|
| 3421 | +static inline int assign_proto_idx(struct proto *prot) |
---|
3262 | 3422 | { |
---|
| 3423 | + return 0; |
---|
3263 | 3424 | } |
---|
3264 | 3425 | |
---|
3265 | 3426 | static inline void release_proto_idx(struct proto *prot) |
---|
.. | .. |
---|
3270 | 3431 | { |
---|
3271 | 3432 | } |
---|
3272 | 3433 | #endif |
---|
| 3434 | + |
---|
| 3435 | +static void tw_prot_cleanup(struct timewait_sock_ops *twsk_prot) |
---|
| 3436 | +{ |
---|
| 3437 | + if (!twsk_prot) |
---|
| 3438 | + return; |
---|
| 3439 | + kfree(twsk_prot->twsk_slab_name); |
---|
| 3440 | + twsk_prot->twsk_slab_name = NULL; |
---|
| 3441 | + kmem_cache_destroy(twsk_prot->twsk_slab); |
---|
| 3442 | + twsk_prot->twsk_slab = NULL; |
---|
| 3443 | +} |
---|
3273 | 3444 | |
---|
3274 | 3445 | static void req_prot_cleanup(struct request_sock_ops *rsk_prot) |
---|
3275 | 3446 | { |
---|
.. | .. |
---|
3308 | 3479 | |
---|
3309 | 3480 | int proto_register(struct proto *prot, int alloc_slab) |
---|
3310 | 3481 | { |
---|
| 3482 | + int ret = -ENOBUFS; |
---|
| 3483 | + |
---|
3311 | 3484 | if (alloc_slab) { |
---|
3312 | 3485 | prot->slab = kmem_cache_create_usercopy(prot->name, |
---|
3313 | 3486 | prot->obj_size, 0, |
---|
.. | .. |
---|
3339 | 3512 | prot->slab_flags, |
---|
3340 | 3513 | NULL); |
---|
3341 | 3514 | if (prot->twsk_prot->twsk_slab == NULL) |
---|
3342 | | - goto out_free_timewait_sock_slab_name; |
---|
| 3515 | + goto out_free_timewait_sock_slab; |
---|
3343 | 3516 | } |
---|
3344 | 3517 | } |
---|
3345 | 3518 | |
---|
3346 | 3519 | mutex_lock(&proto_list_mutex); |
---|
| 3520 | + ret = assign_proto_idx(prot); |
---|
| 3521 | + if (ret) { |
---|
| 3522 | + mutex_unlock(&proto_list_mutex); |
---|
| 3523 | + goto out_free_timewait_sock_slab; |
---|
| 3524 | + } |
---|
3347 | 3525 | list_add(&prot->node, &proto_list); |
---|
3348 | | - assign_proto_idx(prot); |
---|
3349 | 3526 | mutex_unlock(&proto_list_mutex); |
---|
3350 | | - return 0; |
---|
| 3527 | + return ret; |
---|
3351 | 3528 | |
---|
3352 | | -out_free_timewait_sock_slab_name: |
---|
3353 | | - kfree(prot->twsk_prot->twsk_slab_name); |
---|
| 3529 | +out_free_timewait_sock_slab: |
---|
| 3530 | + if (alloc_slab && prot->twsk_prot) |
---|
| 3531 | + tw_prot_cleanup(prot->twsk_prot); |
---|
3354 | 3532 | out_free_request_sock_slab: |
---|
3355 | | - req_prot_cleanup(prot->rsk_prot); |
---|
| 3533 | + if (alloc_slab) { |
---|
| 3534 | + req_prot_cleanup(prot->rsk_prot); |
---|
3356 | 3535 | |
---|
3357 | | - kmem_cache_destroy(prot->slab); |
---|
3358 | | - prot->slab = NULL; |
---|
| 3536 | + kmem_cache_destroy(prot->slab); |
---|
| 3537 | + prot->slab = NULL; |
---|
| 3538 | + } |
---|
3359 | 3539 | out: |
---|
3360 | | - return -ENOBUFS; |
---|
| 3540 | + return ret; |
---|
3361 | 3541 | } |
---|
3362 | 3542 | EXPORT_SYMBOL(proto_register); |
---|
3363 | 3543 | |
---|
.. | .. |
---|
3372 | 3552 | prot->slab = NULL; |
---|
3373 | 3553 | |
---|
3374 | 3554 | req_prot_cleanup(prot->rsk_prot); |
---|
3375 | | - |
---|
3376 | | - if (prot->twsk_prot != NULL && prot->twsk_prot->twsk_slab != NULL) { |
---|
3377 | | - kmem_cache_destroy(prot->twsk_prot->twsk_slab); |
---|
3378 | | - kfree(prot->twsk_prot->twsk_slab_name); |
---|
3379 | | - prot->twsk_prot->twsk_slab = NULL; |
---|
3380 | | - } |
---|
| 3555 | + tw_prot_cleanup(prot->twsk_prot); |
---|
3381 | 3556 | } |
---|
3382 | 3557 | EXPORT_SYMBOL(proto_unregister); |
---|
3383 | 3558 | |
---|
.. | .. |
---|
3394 | 3569 | #ifdef CONFIG_INET |
---|
3395 | 3570 | if (family == AF_INET && |
---|
3396 | 3571 | protocol != IPPROTO_RAW && |
---|
| 3572 | + protocol < MAX_INET_PROTOS && |
---|
3397 | 3573 | !rcu_access_pointer(inet_protos[protocol])) |
---|
3398 | 3574 | return -ENOENT; |
---|
3399 | 3575 | #endif |
---|
.. | .. |
---|
3431 | 3607 | return proto->memory_allocated != NULL ? proto_memory_allocated(proto) : -1L; |
---|
3432 | 3608 | } |
---|
3433 | 3609 | |
---|
3434 | | -static char *sock_prot_memory_pressure(struct proto *proto) |
---|
| 3610 | +static const char *sock_prot_memory_pressure(struct proto *proto) |
---|
3435 | 3611 | { |
---|
3436 | 3612 | return proto->memory_pressure != NULL ? |
---|
3437 | 3613 | proto_memory_pressure(proto) ? "yes" : "no" : "NI"; |
---|
.. | .. |
---|
3535 | 3711 | } |
---|
3536 | 3712 | EXPORT_SYMBOL(sk_busy_loop_end); |
---|
3537 | 3713 | #endif /* CONFIG_NET_RX_BUSY_POLL */ |
---|
| 3714 | + |
---|
| 3715 | +int sock_bind_add(struct sock *sk, struct sockaddr *addr, int addr_len) |
---|
| 3716 | +{ |
---|
| 3717 | + if (!sk->sk_prot->bind_add) |
---|
| 3718 | + return -EOPNOTSUPP; |
---|
| 3719 | + return sk->sk_prot->bind_add(sk, addr, addr_len); |
---|
| 3720 | +} |
---|
| 3721 | +EXPORT_SYMBOL(sock_bind_add); |
---|