hc
2023-12-09 b22da3d8526a935aa31e086e63f60ff3246cb61c
kernel/net/core/sock.c
....@@ -1,3 +1,4 @@
1
+// SPDX-License-Identifier: GPL-2.0-or-later
12 /*
23 * INET An implementation of the TCP/IP protocol suite for the LINUX
34 * operating system. INET is implemented using the BSD Socket
....@@ -5,7 +6,6 @@
56 *
67 * Generic socket support routines. Memory allocators, socket lock/release
78 * handler for protocols to use and generic option handler.
8
- *
99 *
1010 * Authors: Ross Biro
1111 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
....@@ -81,12 +81,6 @@
8181 * Arnaldo C. Melo : cleanups, use skb_queue_purge
8282 *
8383 * To Fix:
84
- *
85
- *
86
- * This program is free software; you can redistribute it and/or
87
- * modify it under the terms of the GNU General Public License
88
- * as published by the Free Software Foundation; either version
89
- * 2 of the License, or (at your option) any later version.
9084 */
9185
9286 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
....@@ -119,6 +113,7 @@
119113 #include <linux/static_key.h>
120114 #include <linux/memcontrol.h>
121115 #include <linux/prefetch.h>
116
+#include <linux/compat.h>
122117
123118 #include <linux/uaccess.h>
124119
....@@ -137,8 +132,10 @@
137132
138133 #include <linux/filter.h>
139134 #include <net/sock_reuseport.h>
135
+#include <net/bpf_sk_storage.h>
140136
141137 #include <trace/events/sock.h>
138
+#include <trace/hooks/sched.h>
142139
143140 #include <net/tcp.h>
144141 #include <net/busy_poll.h>
....@@ -335,14 +332,66 @@
335332 }
336333 EXPORT_SYMBOL(__sk_backlog_rcv);
337334
338
-static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
335
+static int sock_get_timeout(long timeo, void *optval, bool old_timeval)
339336 {
340
- struct timeval tv;
337
+ struct __kernel_sock_timeval tv;
341338
342
- if (optlen < sizeof(tv))
343
- return -EINVAL;
344
- if (copy_from_user(&tv, optval, sizeof(tv)))
345
- return -EFAULT;
339
+ if (timeo == MAX_SCHEDULE_TIMEOUT) {
340
+ tv.tv_sec = 0;
341
+ tv.tv_usec = 0;
342
+ } else {
343
+ tv.tv_sec = timeo / HZ;
344
+ tv.tv_usec = ((timeo % HZ) * USEC_PER_SEC) / HZ;
345
+ }
346
+
347
+ if (old_timeval && in_compat_syscall() && !COMPAT_USE_64BIT_TIME) {
348
+ struct old_timeval32 tv32 = { tv.tv_sec, tv.tv_usec };
349
+ *(struct old_timeval32 *)optval = tv32;
350
+ return sizeof(tv32);
351
+ }
352
+
353
+ if (old_timeval) {
354
+ struct __kernel_old_timeval old_tv;
355
+ old_tv.tv_sec = tv.tv_sec;
356
+ old_tv.tv_usec = tv.tv_usec;
357
+ *(struct __kernel_old_timeval *)optval = old_tv;
358
+ return sizeof(old_tv);
359
+ }
360
+
361
+ *(struct __kernel_sock_timeval *)optval = tv;
362
+ return sizeof(tv);
363
+}
364
+
365
+static int sock_set_timeout(long *timeo_p, sockptr_t optval, int optlen,
366
+ bool old_timeval)
367
+{
368
+ struct __kernel_sock_timeval tv;
369
+
370
+ if (old_timeval && in_compat_syscall() && !COMPAT_USE_64BIT_TIME) {
371
+ struct old_timeval32 tv32;
372
+
373
+ if (optlen < sizeof(tv32))
374
+ return -EINVAL;
375
+
376
+ if (copy_from_sockptr(&tv32, optval, sizeof(tv32)))
377
+ return -EFAULT;
378
+ tv.tv_sec = tv32.tv_sec;
379
+ tv.tv_usec = tv32.tv_usec;
380
+ } else if (old_timeval) {
381
+ struct __kernel_old_timeval old_tv;
382
+
383
+ if (optlen < sizeof(old_tv))
384
+ return -EINVAL;
385
+ if (copy_from_sockptr(&old_tv, optval, sizeof(old_tv)))
386
+ return -EFAULT;
387
+ tv.tv_sec = old_tv.tv_sec;
388
+ tv.tv_usec = old_tv.tv_usec;
389
+ } else {
390
+ if (optlen < sizeof(tv))
391
+ return -EINVAL;
392
+ if (copy_from_sockptr(&tv, optval, sizeof(tv)))
393
+ return -EFAULT;
394
+ }
346395 if (tv.tv_usec < 0 || tv.tv_usec >= USEC_PER_SEC)
347396 return -EDOM;
348397
....@@ -360,21 +409,9 @@
360409 *timeo_p = MAX_SCHEDULE_TIMEOUT;
361410 if (tv.tv_sec == 0 && tv.tv_usec == 0)
362411 return 0;
363
- if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT/HZ - 1))
364
- *timeo_p = tv.tv_sec * HZ + DIV_ROUND_UP(tv.tv_usec, USEC_PER_SEC / HZ);
412
+ if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT / HZ - 1))
413
+ *timeo_p = tv.tv_sec * HZ + DIV_ROUND_UP((unsigned long)tv.tv_usec, USEC_PER_SEC / HZ);
365414 return 0;
366
-}
367
-
368
-static void sock_warn_obsolete_bsdism(const char *name)
369
-{
370
- static int warned;
371
- static char warncomm[TASK_COMM_LEN];
372
- if (strcmp(warncomm, current->comm) && warned < 5) {
373
- strcpy(warncomm, current->comm);
374
- pr_warn("process `%s' is using obsolete %s SO_BSDCOMPAT\n",
375
- warncomm, name);
376
- warned++;
377
- }
378415 }
379416
380417 static bool sock_needs_netstamp(const struct sock *sk)
....@@ -472,8 +509,8 @@
472509
473510 rc = sk_backlog_rcv(sk, skb);
474511
475
- mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
476
- } else if (sk_add_backlog(sk, skb, sk->sk_rcvbuf)) {
512
+ mutex_release(&sk->sk_lock.dep_map, _RET_IP_);
513
+ } else if (sk_add_backlog(sk, skb, READ_ONCE(sk->sk_rcvbuf))) {
477514 bh_unlock_sock(sk);
478515 atomic_inc(&sk->sk_drops);
479516 goto discard_and_relse;
....@@ -520,19 +557,55 @@
520557 }
521558 EXPORT_SYMBOL(sk_dst_check);
522559
523
-static int sock_setbindtodevice(struct sock *sk, char __user *optval,
524
- int optlen)
560
+static int sock_bindtoindex_locked(struct sock *sk, int ifindex)
561
+{
562
+ int ret = -ENOPROTOOPT;
563
+#ifdef CONFIG_NETDEVICES
564
+ struct net *net = sock_net(sk);
565
+
566
+ /* Sorry... */
567
+ ret = -EPERM;
568
+ if (sk->sk_bound_dev_if && !ns_capable(net->user_ns, CAP_NET_RAW))
569
+ goto out;
570
+
571
+ ret = -EINVAL;
572
+ if (ifindex < 0)
573
+ goto out;
574
+
575
+ sk->sk_bound_dev_if = ifindex;
576
+ if (sk->sk_prot->rehash)
577
+ sk->sk_prot->rehash(sk);
578
+ sk_dst_reset(sk);
579
+
580
+ ret = 0;
581
+
582
+out:
583
+#endif
584
+
585
+ return ret;
586
+}
587
+
588
+int sock_bindtoindex(struct sock *sk, int ifindex, bool lock_sk)
589
+{
590
+ int ret;
591
+
592
+ if (lock_sk)
593
+ lock_sock(sk);
594
+ ret = sock_bindtoindex_locked(sk, ifindex);
595
+ if (lock_sk)
596
+ release_sock(sk);
597
+
598
+ return ret;
599
+}
600
+EXPORT_SYMBOL(sock_bindtoindex);
601
+
602
+static int sock_setbindtodevice(struct sock *sk, sockptr_t optval, int optlen)
525603 {
526604 int ret = -ENOPROTOOPT;
527605 #ifdef CONFIG_NETDEVICES
528606 struct net *net = sock_net(sk);
529607 char devname[IFNAMSIZ];
530608 int index;
531
-
532
- /* Sorry... */
533
- ret = -EPERM;
534
- if (!ns_capable(net->user_ns, CAP_NET_RAW))
535
- goto out;
536609
537610 ret = -EINVAL;
538611 if (optlen < 0)
....@@ -548,7 +621,7 @@
548621 memset(devname, 0, sizeof(devname));
549622
550623 ret = -EFAULT;
551
- if (copy_from_user(devname, optval, optlen))
624
+ if (copy_from_sockptr(devname, optval, optlen))
552625 goto out;
553626
554627 index = 0;
....@@ -565,13 +638,7 @@
565638 goto out;
566639 }
567640
568
- lock_sock(sk);
569
- sk->sk_bound_dev_if = index;
570
- sk_dst_reset(sk);
571
- release_sock(sk);
572
-
573
- ret = 0;
574
-
641
+ return sock_bindtoindex(sk, index, true);
575642 out:
576643 #endif
577644
....@@ -618,14 +685,6 @@
618685 return ret;
619686 }
620687
621
-static inline void sock_valbool_flag(struct sock *sk, int bit, int valbool)
622
-{
623
- if (valbool)
624
- sock_set_flag(sk, bit);
625
- else
626
- sock_reset_flag(sk, bit);
627
-}
628
-
629688 bool sk_mc_loop(struct sock *sk)
630689 {
631690 if (dev_recursion_level())
....@@ -645,13 +704,133 @@
645704 }
646705 EXPORT_SYMBOL(sk_mc_loop);
647706
707
+void sock_set_reuseaddr(struct sock *sk)
708
+{
709
+ lock_sock(sk);
710
+ sk->sk_reuse = SK_CAN_REUSE;
711
+ release_sock(sk);
712
+}
713
+EXPORT_SYMBOL(sock_set_reuseaddr);
714
+
715
+void sock_set_reuseport(struct sock *sk)
716
+{
717
+ lock_sock(sk);
718
+ sk->sk_reuseport = true;
719
+ release_sock(sk);
720
+}
721
+EXPORT_SYMBOL(sock_set_reuseport);
722
+
723
+void sock_no_linger(struct sock *sk)
724
+{
725
+ lock_sock(sk);
726
+ sk->sk_lingertime = 0;
727
+ sock_set_flag(sk, SOCK_LINGER);
728
+ release_sock(sk);
729
+}
730
+EXPORT_SYMBOL(sock_no_linger);
731
+
732
+void sock_set_priority(struct sock *sk, u32 priority)
733
+{
734
+ lock_sock(sk);
735
+ sk->sk_priority = priority;
736
+ release_sock(sk);
737
+}
738
+EXPORT_SYMBOL(sock_set_priority);
739
+
740
+void sock_set_sndtimeo(struct sock *sk, s64 secs)
741
+{
742
+ lock_sock(sk);
743
+ if (secs && secs < MAX_SCHEDULE_TIMEOUT / HZ - 1)
744
+ sk->sk_sndtimeo = secs * HZ;
745
+ else
746
+ sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT;
747
+ release_sock(sk);
748
+}
749
+EXPORT_SYMBOL(sock_set_sndtimeo);
750
+
751
+static void __sock_set_timestamps(struct sock *sk, bool val, bool new, bool ns)
752
+{
753
+ if (val) {
754
+ sock_valbool_flag(sk, SOCK_TSTAMP_NEW, new);
755
+ sock_valbool_flag(sk, SOCK_RCVTSTAMPNS, ns);
756
+ sock_set_flag(sk, SOCK_RCVTSTAMP);
757
+ sock_enable_timestamp(sk, SOCK_TIMESTAMP);
758
+ } else {
759
+ sock_reset_flag(sk, SOCK_RCVTSTAMP);
760
+ sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
761
+ }
762
+}
763
+
764
+void sock_enable_timestamps(struct sock *sk)
765
+{
766
+ lock_sock(sk);
767
+ __sock_set_timestamps(sk, true, false, true);
768
+ release_sock(sk);
769
+}
770
+EXPORT_SYMBOL(sock_enable_timestamps);
771
+
772
+void sock_set_keepalive(struct sock *sk)
773
+{
774
+ lock_sock(sk);
775
+ if (sk->sk_prot->keepalive)
776
+ sk->sk_prot->keepalive(sk, true);
777
+ sock_valbool_flag(sk, SOCK_KEEPOPEN, true);
778
+ release_sock(sk);
779
+}
780
+EXPORT_SYMBOL(sock_set_keepalive);
781
+
782
+static void __sock_set_rcvbuf(struct sock *sk, int val)
783
+{
784
+ /* Ensure val * 2 fits into an int, to prevent max_t() from treating it
785
+ * as a negative value.
786
+ */
787
+ val = min_t(int, val, INT_MAX / 2);
788
+ sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
789
+
790
+ /* We double it on the way in to account for "struct sk_buff" etc.
791
+ * overhead. Applications assume that the SO_RCVBUF setting they make
792
+ * will allow that much actual data to be received on that socket.
793
+ *
794
+ * Applications are unaware that "struct sk_buff" and other overheads
795
+ * allocate from the receive buffer during socket buffer allocation.
796
+ *
797
+ * And after considering the possible alternatives, returning the value
798
+ * we actually used in getsockopt is the most desirable behavior.
799
+ */
800
+ WRITE_ONCE(sk->sk_rcvbuf, max_t(int, val * 2, SOCK_MIN_RCVBUF));
801
+}
802
+
803
+void sock_set_rcvbuf(struct sock *sk, int val)
804
+{
805
+ lock_sock(sk);
806
+ __sock_set_rcvbuf(sk, val);
807
+ release_sock(sk);
808
+}
809
+EXPORT_SYMBOL(sock_set_rcvbuf);
810
+
811
+static void __sock_set_mark(struct sock *sk, u32 val)
812
+{
813
+ if (val != sk->sk_mark) {
814
+ sk->sk_mark = val;
815
+ sk_dst_reset(sk);
816
+ }
817
+}
818
+
819
+void sock_set_mark(struct sock *sk, u32 val)
820
+{
821
+ lock_sock(sk);
822
+ __sock_set_mark(sk, val);
823
+ release_sock(sk);
824
+}
825
+EXPORT_SYMBOL(sock_set_mark);
826
+
648827 /*
649828 * This is meant for all protocols to use and covers goings on
650829 * at the socket level. Everything here is generic.
651830 */
652831
653832 int sock_setsockopt(struct socket *sock, int level, int optname,
654
- char __user *optval, unsigned int optlen)
833
+ sockptr_t optval, unsigned int optlen)
655834 {
656835 struct sock_txtime sk_txtime;
657836 struct sock *sk = sock->sk;
....@@ -670,7 +849,7 @@
670849 if (optlen < sizeof(int))
671850 return -EINVAL;
672851
673
- if (get_user(val, (int __user *)optval))
852
+ if (copy_from_sockptr(&val, optval, sizeof(val)))
674853 return -EFAULT;
675854
676855 valbool = val ? 1 : 0;
....@@ -709,10 +888,15 @@
709888 * play 'guess the biggest size' games. RCVBUF/SNDBUF
710889 * are treated in BSD as hints
711890 */
712
- val = min_t(u32, val, sysctl_wmem_max);
891
+ val = min_t(u32, val, READ_ONCE(sysctl_wmem_max));
713892 set_sndbuf:
893
+ /* Ensure val * 2 fits into an int, to prevent max_t()
894
+ * from treating it as a negative value.
895
+ */
896
+ val = min_t(int, val, INT_MAX / 2);
714897 sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
715
- sk->sk_sndbuf = max_t(int, val * 2, SOCK_MIN_SNDBUF);
898
+ WRITE_ONCE(sk->sk_sndbuf,
899
+ max_t(int, val * 2, SOCK_MIN_SNDBUF));
716900 /* Wake up sending tasks if we upped the value. */
717901 sk->sk_write_space(sk);
718902 break;
....@@ -722,6 +906,12 @@
722906 ret = -EPERM;
723907 break;
724908 }
909
+
910
+ /* No negative values (to prevent underflow, as val will be
911
+ * multiplied by 2).
912
+ */
913
+ if (val < 0)
914
+ val = 0;
725915 goto set_sndbuf;
726916
727917 case SO_RCVBUF:
....@@ -730,25 +920,7 @@
730920 * play 'guess the biggest size' games. RCVBUF/SNDBUF
731921 * are treated in BSD as hints
732922 */
733
- val = min_t(u32, val, sysctl_rmem_max);
734
-set_rcvbuf:
735
- sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
736
- /*
737
- * We double it on the way in to account for
738
- * "struct sk_buff" etc. overhead. Applications
739
- * assume that the SO_RCVBUF setting they make will
740
- * allow that much actual data to be received on that
741
- * socket.
742
- *
743
- * Applications are unaware that "struct sk_buff" and
744
- * other overheads allocate from the receive buffer
745
- * during socket buffer allocation.
746
- *
747
- * And after considering the possible alternatives,
748
- * returning the value we actually used in getsockopt
749
- * is the most desirable behavior.
750
- */
751
- sk->sk_rcvbuf = max_t(int, val * 2, SOCK_MIN_RCVBUF);
923
+ __sock_set_rcvbuf(sk, min_t(u32, val, READ_ONCE(sysctl_rmem_max)));
752924 break;
753925
754926 case SO_RCVBUFFORCE:
....@@ -756,7 +928,12 @@
756928 ret = -EPERM;
757929 break;
758930 }
759
- goto set_rcvbuf;
931
+
932
+ /* No negative values (to prevent underflow, as val will be
933
+ * multiplied by 2).
934
+ */
935
+ __sock_set_rcvbuf(sk, max(val, 0));
936
+ break;
760937
761938 case SO_KEEPALIVE:
762939 if (sk->sk_prot->keepalive)
....@@ -785,7 +962,7 @@
785962 ret = -EINVAL; /* 1003.1g */
786963 break;
787964 }
788
- if (copy_from_user(&ling, optval, sizeof(ling))) {
965
+ if (copy_from_sockptr(&ling, optval, sizeof(ling))) {
789966 ret = -EFAULT;
790967 break;
791968 }
....@@ -803,7 +980,6 @@
803980 break;
804981
805982 case SO_BSDCOMPAT:
806
- sock_warn_obsolete_bsdism("setsockopt");
807983 break;
808984
809985 case SO_PASSCRED:
....@@ -813,22 +989,20 @@
813989 clear_bit(SOCK_PASSCRED, &sock->flags);
814990 break;
815991
816
- case SO_TIMESTAMP:
817
- case SO_TIMESTAMPNS:
818
- if (valbool) {
819
- if (optname == SO_TIMESTAMP)
820
- sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
821
- else
822
- sock_set_flag(sk, SOCK_RCVTSTAMPNS);
823
- sock_set_flag(sk, SOCK_RCVTSTAMP);
824
- sock_enable_timestamp(sk, SOCK_TIMESTAMP);
825
- } else {
826
- sock_reset_flag(sk, SOCK_RCVTSTAMP);
827
- sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
828
- }
992
+ case SO_TIMESTAMP_OLD:
993
+ __sock_set_timestamps(sk, valbool, false, false);
829994 break;
830
-
831
- case SO_TIMESTAMPING:
995
+ case SO_TIMESTAMP_NEW:
996
+ __sock_set_timestamps(sk, valbool, true, false);
997
+ break;
998
+ case SO_TIMESTAMPNS_OLD:
999
+ __sock_set_timestamps(sk, valbool, false, true);
1000
+ break;
1001
+ case SO_TIMESTAMPNS_NEW:
1002
+ __sock_set_timestamps(sk, valbool, true, true);
1003
+ break;
1004
+ case SO_TIMESTAMPING_NEW:
1005
+ case SO_TIMESTAMPING_OLD:
8321006 if (val & ~SOF_TIMESTAMPING_MASK) {
8331007 ret = -EINVAL;
8341008 break;
....@@ -856,6 +1030,8 @@
8561030 }
8571031
8581032 sk->sk_tsflags = val;
1033
+ sock_valbool_flag(sk, SOCK_TSTAMP_NEW, optname == SO_TIMESTAMPING_NEW);
1034
+
8591035 if (val & SOF_TIMESTAMPING_RX_SOFTWARE)
8601036 sock_enable_timestamp(sk,
8611037 SOCK_TIMESTAMPING_RX_SOFTWARE);
....@@ -870,67 +1046,65 @@
8701046 if (sock->ops->set_rcvlowat)
8711047 ret = sock->ops->set_rcvlowat(sk, val);
8721048 else
873
- sk->sk_rcvlowat = val ? : 1;
1049
+ WRITE_ONCE(sk->sk_rcvlowat, val ? : 1);
8741050 break;
8751051
876
- case SO_RCVTIMEO:
877
- ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen);
1052
+ case SO_RCVTIMEO_OLD:
1053
+ case SO_RCVTIMEO_NEW:
1054
+ ret = sock_set_timeout(&sk->sk_rcvtimeo, optval,
1055
+ optlen, optname == SO_RCVTIMEO_OLD);
8781056 break;
8791057
880
- case SO_SNDTIMEO:
881
- ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen);
1058
+ case SO_SNDTIMEO_OLD:
1059
+ case SO_SNDTIMEO_NEW:
1060
+ ret = sock_set_timeout(&sk->sk_sndtimeo, optval,
1061
+ optlen, optname == SO_SNDTIMEO_OLD);
8821062 break;
8831063
884
- case SO_ATTACH_FILTER:
885
- ret = -EINVAL;
886
- if (optlen == sizeof(struct sock_fprog)) {
887
- struct sock_fprog fprog;
1064
+ case SO_ATTACH_FILTER: {
1065
+ struct sock_fprog fprog;
8881066
889
- ret = -EFAULT;
890
- if (copy_from_user(&fprog, optval, sizeof(fprog)))
891
- break;
892
-
1067
+ ret = copy_bpf_fprog_from_user(&fprog, optval, optlen);
1068
+ if (!ret)
8931069 ret = sk_attach_filter(&fprog, sk);
894
- }
8951070 break;
896
-
1071
+ }
8971072 case SO_ATTACH_BPF:
8981073 ret = -EINVAL;
8991074 if (optlen == sizeof(u32)) {
9001075 u32 ufd;
9011076
9021077 ret = -EFAULT;
903
- if (copy_from_user(&ufd, optval, sizeof(ufd)))
1078
+ if (copy_from_sockptr(&ufd, optval, sizeof(ufd)))
9041079 break;
9051080
9061081 ret = sk_attach_bpf(ufd, sk);
9071082 }
9081083 break;
9091084
910
- case SO_ATTACH_REUSEPORT_CBPF:
911
- ret = -EINVAL;
912
- if (optlen == sizeof(struct sock_fprog)) {
913
- struct sock_fprog fprog;
1085
+ case SO_ATTACH_REUSEPORT_CBPF: {
1086
+ struct sock_fprog fprog;
9141087
915
- ret = -EFAULT;
916
- if (copy_from_user(&fprog, optval, sizeof(fprog)))
917
- break;
918
-
1088
+ ret = copy_bpf_fprog_from_user(&fprog, optval, optlen);
1089
+ if (!ret)
9191090 ret = sk_reuseport_attach_filter(&fprog, sk);
920
- }
9211091 break;
922
-
1092
+ }
9231093 case SO_ATTACH_REUSEPORT_EBPF:
9241094 ret = -EINVAL;
9251095 if (optlen == sizeof(u32)) {
9261096 u32 ufd;
9271097
9281098 ret = -EFAULT;
929
- if (copy_from_user(&ufd, optval, sizeof(ufd)))
1099
+ if (copy_from_sockptr(&ufd, optval, sizeof(ufd)))
9301100 break;
9311101
9321102 ret = sk_reuseport_attach_bpf(ufd, sk);
9331103 }
1104
+ break;
1105
+
1106
+ case SO_DETACH_REUSEPORT_BPF:
1107
+ ret = reuseport_detach_prog(sk);
9341108 break;
9351109
9361110 case SO_DETACH_FILTER:
....@@ -951,10 +1125,12 @@
9511125 clear_bit(SOCK_PASSSEC, &sock->flags);
9521126 break;
9531127 case SO_MARK:
954
- if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
1128
+ if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {
9551129 ret = -EPERM;
956
- else
957
- sk->sk_mark = val;
1130
+ break;
1131
+ }
1132
+
1133
+ __sock_set_mark(sk, val);
9581134 break;
9591135
9601136 case SO_RXQ_OVFL:
....@@ -995,15 +1171,23 @@
9951171 #endif
9961172
9971173 case SO_MAX_PACING_RATE:
998
- if (val != ~0U)
1174
+ {
1175
+ unsigned long ulval = (val == ~0U) ? ~0UL : (unsigned int)val;
1176
+
1177
+ if (sizeof(ulval) != sizeof(val) &&
1178
+ optlen >= sizeof(ulval) &&
1179
+ copy_from_sockptr(&ulval, optval, sizeof(ulval))) {
1180
+ ret = -EFAULT;
1181
+ break;
1182
+ }
1183
+ if (ulval != ~0UL)
9991184 cmpxchg(&sk->sk_pacing_status,
10001185 SK_PACING_NONE,
10011186 SK_PACING_NEEDED);
1002
- sk->sk_max_pacing_rate = val;
1003
- sk->sk_pacing_rate = min(sk->sk_pacing_rate,
1004
- sk->sk_max_pacing_rate);
1187
+ sk->sk_max_pacing_rate = ulval;
1188
+ sk->sk_pacing_rate = min(sk->sk_pacing_rate, ulval);
10051189 break;
1006
-
1190
+ }
10071191 case SO_INCOMING_CPU:
10081192 WRITE_ONCE(sk->sk_incoming_cpu, val);
10091193 break;
....@@ -1015,7 +1199,10 @@
10151199
10161200 case SO_ZEROCOPY:
10171201 if (sk->sk_family == PF_INET || sk->sk_family == PF_INET6) {
1018
- if (sk->sk_protocol != IPPROTO_TCP)
1202
+ if (!((sk->sk_type == SOCK_STREAM &&
1203
+ sk->sk_protocol == IPPROTO_TCP) ||
1204
+ (sk->sk_type == SOCK_DGRAM &&
1205
+ sk->sk_protocol == IPPROTO_UDP)))
10191206 ret = -ENOTSUPP;
10201207 } else if (sk->sk_family != PF_RDS) {
10211208 ret = -ENOTSUPP;
....@@ -1029,23 +1216,35 @@
10291216 break;
10301217
10311218 case SO_TXTIME:
1032
- if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {
1033
- ret = -EPERM;
1034
- } else if (optlen != sizeof(struct sock_txtime)) {
1219
+ if (optlen != sizeof(struct sock_txtime)) {
10351220 ret = -EINVAL;
1036
- } else if (copy_from_user(&sk_txtime, optval,
1221
+ break;
1222
+ } else if (copy_from_sockptr(&sk_txtime, optval,
10371223 sizeof(struct sock_txtime))) {
10381224 ret = -EFAULT;
1225
+ break;
10391226 } else if (sk_txtime.flags & ~SOF_TXTIME_FLAGS_MASK) {
10401227 ret = -EINVAL;
1041
- } else {
1042
- sock_valbool_flag(sk, SOCK_TXTIME, true);
1043
- sk->sk_clockid = sk_txtime.clockid;
1044
- sk->sk_txtime_deadline_mode =
1045
- !!(sk_txtime.flags & SOF_TXTIME_DEADLINE_MODE);
1046
- sk->sk_txtime_report_errors =
1047
- !!(sk_txtime.flags & SOF_TXTIME_REPORT_ERRORS);
1228
+ break;
10481229 }
1230
+ /* CLOCK_MONOTONIC is only used by sch_fq, and this packet
1231
+ * scheduler has enough safe guards.
1232
+ */
1233
+ if (sk_txtime.clockid != CLOCK_MONOTONIC &&
1234
+ !ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {
1235
+ ret = -EPERM;
1236
+ break;
1237
+ }
1238
+ sock_valbool_flag(sk, SOCK_TXTIME, true);
1239
+ sk->sk_clockid = sk_txtime.clockid;
1240
+ sk->sk_txtime_deadline_mode =
1241
+ !!(sk_txtime.flags & SOF_TXTIME_DEADLINE_MODE);
1242
+ sk->sk_txtime_report_errors =
1243
+ !!(sk_txtime.flags & SOF_TXTIME_REPORT_ERRORS);
1244
+ break;
1245
+
1246
+ case SO_BINDTOIFINDEX:
1247
+ ret = sock_bindtoindex_locked(sk, val);
10491248 break;
10501249
10511250 default:
....@@ -1101,8 +1300,11 @@
11011300 union {
11021301 int val;
11031302 u64 val64;
1303
+ unsigned long ulval;
11041304 struct linger ling;
1105
- struct timeval tm;
1305
+ struct old_timeval32 tm32;
1306
+ struct __kernel_old_timeval tm;
1307
+ struct __kernel_sock_timeval stm;
11061308 struct sock_txtime txtime;
11071309 } v;
11081310
....@@ -1186,42 +1388,38 @@
11861388 break;
11871389
11881390 case SO_BSDCOMPAT:
1189
- sock_warn_obsolete_bsdism("getsockopt");
11901391 break;
11911392
1192
- case SO_TIMESTAMP:
1393
+ case SO_TIMESTAMP_OLD:
11931394 v.val = sock_flag(sk, SOCK_RCVTSTAMP) &&
1395
+ !sock_flag(sk, SOCK_TSTAMP_NEW) &&
11941396 !sock_flag(sk, SOCK_RCVTSTAMPNS);
11951397 break;
11961398
1197
- case SO_TIMESTAMPNS:
1198
- v.val = sock_flag(sk, SOCK_RCVTSTAMPNS);
1399
+ case SO_TIMESTAMPNS_OLD:
1400
+ v.val = sock_flag(sk, SOCK_RCVTSTAMPNS) && !sock_flag(sk, SOCK_TSTAMP_NEW);
11991401 break;
12001402
1201
- case SO_TIMESTAMPING:
1403
+ case SO_TIMESTAMP_NEW:
1404
+ v.val = sock_flag(sk, SOCK_RCVTSTAMP) && sock_flag(sk, SOCK_TSTAMP_NEW);
1405
+ break;
1406
+
1407
+ case SO_TIMESTAMPNS_NEW:
1408
+ v.val = sock_flag(sk, SOCK_RCVTSTAMPNS) && sock_flag(sk, SOCK_TSTAMP_NEW);
1409
+ break;
1410
+
1411
+ case SO_TIMESTAMPING_OLD:
12021412 v.val = sk->sk_tsflags;
12031413 break;
12041414
1205
- case SO_RCVTIMEO:
1206
- lv = sizeof(struct timeval);
1207
- if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) {
1208
- v.tm.tv_sec = 0;
1209
- v.tm.tv_usec = 0;
1210
- } else {
1211
- v.tm.tv_sec = sk->sk_rcvtimeo / HZ;
1212
- v.tm.tv_usec = ((sk->sk_rcvtimeo % HZ) * USEC_PER_SEC) / HZ;
1213
- }
1415
+ case SO_RCVTIMEO_OLD:
1416
+ case SO_RCVTIMEO_NEW:
1417
+ lv = sock_get_timeout(sk->sk_rcvtimeo, &v, SO_RCVTIMEO_OLD == optname);
12141418 break;
12151419
1216
- case SO_SNDTIMEO:
1217
- lv = sizeof(struct timeval);
1218
- if (sk->sk_sndtimeo == MAX_SCHEDULE_TIMEOUT) {
1219
- v.tm.tv_sec = 0;
1220
- v.tm.tv_usec = 0;
1221
- } else {
1222
- v.tm.tv_sec = sk->sk_sndtimeo / HZ;
1223
- v.tm.tv_usec = ((sk->sk_sndtimeo % HZ) * USEC_PER_SEC) / HZ;
1224
- }
1420
+ case SO_SNDTIMEO_OLD:
1421
+ case SO_SNDTIMEO_NEW:
1422
+ lv = sock_get_timeout(sk->sk_sndtimeo, &v, SO_SNDTIMEO_OLD == optname);
12251423 break;
12261424
12271425 case SO_RCVLOWAT:
....@@ -1354,7 +1552,13 @@
13541552 #endif
13551553
13561554 case SO_MAX_PACING_RATE:
1357
- v.val = sk->sk_max_pacing_rate;
1555
+ if (sizeof(v.ulval) != sizeof(v.val) && len >= sizeof(v.ulval)) {
1556
+ lv = sizeof(v.ulval);
1557
+ v.ulval = sk->sk_max_pacing_rate;
1558
+ } else {
1559
+ /* 32bit version */
1560
+ v.val = min_t(unsigned long, sk->sk_max_pacing_rate, ~0U);
1561
+ }
13581562 break;
13591563
13601564 case SO_INCOMING_CPU:
....@@ -1405,6 +1609,10 @@
14051609 SOF_TXTIME_REPORT_ERRORS : 0;
14061610 break;
14071611
1612
+ case SO_BINDTOIFINDEX:
1613
+ v.val = sk->sk_bound_dev_if;
1614
+ break;
1615
+
14081616 default:
14091617 /* We implement the SO_SNDLOWAT etc to not be settable
14101618 * (1003.1g 7).
....@@ -1452,13 +1660,14 @@
14521660 */
14531661 static void sock_copy(struct sock *nsk, const struct sock *osk)
14541662 {
1663
+ const struct proto *prot = READ_ONCE(osk->sk_prot);
14551664 #ifdef CONFIG_SECURITY_NETWORK
14561665 void *sptr = nsk->sk_security;
14571666 #endif
14581667 memcpy(nsk, osk, offsetof(struct sock, sk_dontcopy_begin));
14591668
14601669 memcpy(&nsk->sk_dontcopy_end, &osk->sk_dontcopy_end,
1461
- osk->sk_prot->obj_size - offsetof(struct sock, sk_dontcopy_end));
1670
+ prot->obj_size - offsetof(struct sock, sk_dontcopy_end));
14621671
14631672 #ifdef CONFIG_SECURITY_NETWORK
14641673 nsk->sk_security = sptr;
....@@ -1584,6 +1793,10 @@
15841793
15851794 sock_disable_timestamp(sk, SK_FLAGS_TIMESTAMP);
15861795
1796
+#ifdef CONFIG_BPF_SYSCALL
1797
+ bpf_sk_storage_free(sk);
1798
+#endif
1799
+
15871800 if (atomic_read(&sk->sk_omem_alloc))
15881801 pr_debug("%s: optmem leakage (%d bytes) detected\n",
15891802 __func__, atomic_read(&sk->sk_omem_alloc));
....@@ -1670,112 +1883,121 @@
16701883 */
16711884 struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
16721885 {
1673
- struct sock *newsk;
1886
+ struct proto *prot = READ_ONCE(sk->sk_prot);
1887
+ struct sk_filter *filter;
16741888 bool is_charged = true;
1889
+ struct sock *newsk;
16751890
1676
- newsk = sk_prot_alloc(sk->sk_prot, priority, sk->sk_family);
1677
- if (newsk != NULL) {
1678
- struct sk_filter *filter;
1891
+ newsk = sk_prot_alloc(prot, priority, sk->sk_family);
1892
+ if (!newsk)
1893
+ goto out;
16791894
1680
- sock_copy(newsk, sk);
1895
+ sock_copy(newsk, sk);
16811896
1682
- newsk->sk_prot_creator = sk->sk_prot;
1897
+ newsk->sk_prot_creator = prot;
16831898
1684
- /* SANITY */
1685
- if (likely(newsk->sk_net_refcnt))
1686
- get_net(sock_net(newsk));
1687
- sk_node_init(&newsk->sk_node);
1688
- sock_lock_init(newsk);
1689
- bh_lock_sock(newsk);
1690
- newsk->sk_backlog.head = newsk->sk_backlog.tail = NULL;
1691
- newsk->sk_backlog.len = 0;
1692
-
1693
- atomic_set(&newsk->sk_rmem_alloc, 0);
1694
- /*
1695
- * sk_wmem_alloc set to one (see sk_free() and sock_wfree())
1696
- */
1697
- refcount_set(&newsk->sk_wmem_alloc, 1);
1698
- atomic_set(&newsk->sk_omem_alloc, 0);
1699
- sk_init_common(newsk);
1700
-
1701
- newsk->sk_dst_cache = NULL;
1702
- newsk->sk_dst_pending_confirm = 0;
1703
- newsk->sk_wmem_queued = 0;
1704
- newsk->sk_forward_alloc = 0;
1705
- atomic_set(&newsk->sk_drops, 0);
1706
- newsk->sk_send_head = NULL;
1707
- newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
1708
- atomic_set(&newsk->sk_zckey, 0);
1709
-
1710
- sock_reset_flag(newsk, SOCK_DONE);
1711
-
1712
- /* sk->sk_memcg will be populated at accept() time */
1713
- newsk->sk_memcg = NULL;
1714
-
1715
- cgroup_sk_clone(&newsk->sk_cgrp_data);
1716
-
1717
- rcu_read_lock();
1718
- filter = rcu_dereference(sk->sk_filter);
1719
- if (filter != NULL)
1720
- /* though it's an empty new sock, the charging may fail
1721
- * if sysctl_optmem_max was changed between creation of
1722
- * original socket and cloning
1723
- */
1724
- is_charged = sk_filter_charge(newsk, filter);
1725
- RCU_INIT_POINTER(newsk->sk_filter, filter);
1726
- rcu_read_unlock();
1727
-
1728
- if (unlikely(!is_charged || xfrm_sk_clone_policy(newsk, sk))) {
1729
- /* We need to make sure that we don't uncharge the new
1730
- * socket if we couldn't charge it in the first place
1731
- * as otherwise we uncharge the parent's filter.
1732
- */
1733
- if (!is_charged)
1734
- RCU_INIT_POINTER(newsk->sk_filter, NULL);
1735
- sk_free_unlock_clone(newsk);
1736
- newsk = NULL;
1737
- goto out;
1738
- }
1739
- RCU_INIT_POINTER(newsk->sk_reuseport_cb, NULL);
1740
-
1741
- newsk->sk_err = 0;
1742
- newsk->sk_err_soft = 0;
1743
- newsk->sk_priority = 0;
1744
- newsk->sk_incoming_cpu = raw_smp_processor_id();
1745
- atomic64_set(&newsk->sk_cookie, 0);
1746
- if (likely(newsk->sk_net_refcnt))
1747
- sock_inuse_add(sock_net(newsk), 1);
1748
-
1749
- /*
1750
- * Before updating sk_refcnt, we must commit prior changes to memory
1751
- * (Documentation/RCU/rculist_nulls.txt for details)
1752
- */
1753
- smp_wmb();
1754
- refcount_set(&newsk->sk_refcnt, 2);
1755
-
1756
- /*
1757
- * Increment the counter in the same struct proto as the master
1758
- * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that
1759
- * is the same as sk->sk_prot->socks, as this field was copied
1760
- * with memcpy).
1761
- *
1762
- * This _changes_ the previous behaviour, where
1763
- * tcp_create_openreq_child always was incrementing the
1764
- * equivalent to tcp_prot->socks (inet_sock_nr), so this have
1765
- * to be taken into account in all callers. -acme
1766
- */
1767
- sk_refcnt_debug_inc(newsk);
1768
- sk_set_socket(newsk, NULL);
1769
- sk_tx_queue_clear(newsk);
1770
- newsk->sk_wq = NULL;
1771
-
1772
- if (newsk->sk_prot->sockets_allocated)
1773
- sk_sockets_allocated_inc(newsk);
1774
-
1775
- if (sock_needs_netstamp(sk) &&
1776
- newsk->sk_flags & SK_FLAGS_TIMESTAMP)
1777
- net_enable_timestamp();
1899
+ /* SANITY */
1900
+ if (likely(newsk->sk_net_refcnt)) {
1901
+ get_net(sock_net(newsk));
1902
+ sock_inuse_add(sock_net(newsk), 1);
17781903 }
1904
+ sk_node_init(&newsk->sk_node);
1905
+ sock_lock_init(newsk);
1906
+ bh_lock_sock(newsk);
1907
+ newsk->sk_backlog.head = newsk->sk_backlog.tail = NULL;
1908
+ newsk->sk_backlog.len = 0;
1909
+
1910
+ atomic_set(&newsk->sk_rmem_alloc, 0);
1911
+
1912
+ /* sk_wmem_alloc set to one (see sk_free() and sock_wfree()) */
1913
+ refcount_set(&newsk->sk_wmem_alloc, 1);
1914
+
1915
+ atomic_set(&newsk->sk_omem_alloc, 0);
1916
+ sk_init_common(newsk);
1917
+
1918
+ newsk->sk_dst_cache = NULL;
1919
+ newsk->sk_dst_pending_confirm = 0;
1920
+ newsk->sk_wmem_queued = 0;
1921
+ newsk->sk_forward_alloc = 0;
1922
+ atomic_set(&newsk->sk_drops, 0);
1923
+ newsk->sk_send_head = NULL;
1924
+ newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
1925
+ atomic_set(&newsk->sk_zckey, 0);
1926
+
1927
+ sock_reset_flag(newsk, SOCK_DONE);
1928
+
1929
+ /* sk->sk_memcg will be populated at accept() time */
1930
+ newsk->sk_memcg = NULL;
1931
+
1932
+ cgroup_sk_clone(&newsk->sk_cgrp_data);
1933
+
1934
+ rcu_read_lock();
1935
+ filter = rcu_dereference(sk->sk_filter);
1936
+ if (filter != NULL)
1937
+ /* though it's an empty new sock, the charging may fail
1938
+ * if sysctl_optmem_max was changed between creation of
1939
+ * original socket and cloning
1940
+ */
1941
+ is_charged = sk_filter_charge(newsk, filter);
1942
+ RCU_INIT_POINTER(newsk->sk_filter, filter);
1943
+ rcu_read_unlock();
1944
+
1945
+ if (unlikely(!is_charged || xfrm_sk_clone_policy(newsk, sk))) {
1946
+ /* We need to make sure that we don't uncharge the new
1947
+ * socket if we couldn't charge it in the first place
1948
+ * as otherwise we uncharge the parent's filter.
1949
+ */
1950
+ if (!is_charged)
1951
+ RCU_INIT_POINTER(newsk->sk_filter, NULL);
1952
+ sk_free_unlock_clone(newsk);
1953
+ newsk = NULL;
1954
+ goto out;
1955
+ }
1956
+ RCU_INIT_POINTER(newsk->sk_reuseport_cb, NULL);
1957
+
1958
+ if (bpf_sk_storage_clone(sk, newsk)) {
1959
+ sk_free_unlock_clone(newsk);
1960
+ newsk = NULL;
1961
+ goto out;
1962
+ }
1963
+
1964
+ /* Clear sk_user_data if parent had the pointer tagged
1965
+ * as not suitable for copying when cloning.
1966
+ */
1967
+ if (sk_user_data_is_nocopy(newsk))
1968
+ newsk->sk_user_data = NULL;
1969
+
1970
+ newsk->sk_err = 0;
1971
+ newsk->sk_err_soft = 0;
1972
+ newsk->sk_priority = 0;
1973
+ newsk->sk_incoming_cpu = raw_smp_processor_id();
1974
+
1975
+ /* Before updating sk_refcnt, we must commit prior changes to memory
1976
+ * (Documentation/RCU/rculist_nulls.rst for details)
1977
+ */
1978
+ smp_wmb();
1979
+ refcount_set(&newsk->sk_refcnt, 2);
1980
+
1981
+ /* Increment the counter in the same struct proto as the master
1982
+ * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that
1983
+ * is the same as sk->sk_prot->socks, as this field was copied
1984
+ * with memcpy).
1985
+ *
1986
+ * This _changes_ the previous behaviour, where
1987
+ * tcp_create_openreq_child always was incrementing the
1988
+ * equivalent to tcp_prot->socks (inet_sock_nr), so this have
1989
+ * to be taken into account in all callers. -acme
1990
+ */
1991
+ sk_refcnt_debug_inc(newsk);
1992
+ sk_set_socket(newsk, NULL);
1993
+ sk_tx_queue_clear(newsk);
1994
+ RCU_INIT_POINTER(newsk->sk_wq, NULL);
1995
+
1996
+ if (newsk->sk_prot->sockets_allocated)
1997
+ sk_sockets_allocated_inc(newsk);
1998
+
1999
+ if (sock_needs_netstamp(sk) && newsk->sk_flags & SK_FLAGS_TIMESTAMP)
2000
+ net_enable_timestamp();
17792001 out:
17802002 return newsk;
17812003 }
....@@ -1877,6 +2099,19 @@
18772099 }
18782100 EXPORT_SYMBOL(skb_set_owner_w);
18792101
2102
+static bool can_skb_orphan_partial(const struct sk_buff *skb)
2103
+{
2104
+#ifdef CONFIG_TLS_DEVICE
2105
+ /* Drivers depend on in-order delivery for crypto offload,
2106
+ * partial orphan breaks out-of-order-OK logic.
2107
+ */
2108
+ if (skb->decrypted)
2109
+ return false;
2110
+#endif
2111
+ return (skb->destructor == sock_wfree ||
2112
+ (IS_ENABLED(CONFIG_INET) && skb->destructor == tcp_wfree));
2113
+}
2114
+
18802115 /* This helper is used by netem, as it can hold packets in its
18812116 * delay queue. We want to allow the owner socket to send more
18822117 * packets, as if they were already TX completed by a typical driver.
....@@ -1888,20 +2123,10 @@
18882123 if (skb_is_tcp_pure_ack(skb))
18892124 return;
18902125
1891
- if (skb->destructor == sock_wfree
1892
-#ifdef CONFIG_INET
1893
- || skb->destructor == tcp_wfree
1894
-#endif
1895
- ) {
1896
- struct sock *sk = skb->sk;
2126
+ if (can_skb_orphan_partial(skb) && skb_set_owner_sk_safe(skb, skb->sk))
2127
+ return;
18972128
1898
- if (refcount_inc_not_zero(&sk->sk_refcnt)) {
1899
- WARN_ON(refcount_sub_and_test(skb->truesize, &sk->sk_wmem_alloc));
1900
- skb->destructor = sock_efree;
1901
- }
1902
- } else {
1903
- skb_orphan(skb);
1904
- }
2129
+ skb_orphan(skb);
19052130 }
19062131 EXPORT_SYMBOL(skb_orphan_partial);
19072132
....@@ -1927,6 +2152,18 @@
19272152 sock_put(skb->sk);
19282153 }
19292154 EXPORT_SYMBOL(sock_efree);
2155
+
2156
+/* Buffer destructor for prefetch/receive path where reference count may
2157
+ * not be held, e.g. for listen sockets.
2158
+ */
2159
+#ifdef CONFIG_INET
2160
+void sock_pfree(struct sk_buff *skb)
2161
+{
2162
+ if (sk_is_refcounted(skb->sk))
2163
+ sock_gen_put(skb->sk);
2164
+}
2165
+EXPORT_SYMBOL(sock_pfree);
2166
+#endif /* CONFIG_INET */
19302167
19312168 kuid_t sock_i_uid(struct sock *sk)
19322169 {
....@@ -1956,8 +2193,10 @@
19562193 struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
19572194 gfp_t priority)
19582195 {
1959
- if (force || refcount_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
2196
+ if (force ||
2197
+ refcount_read(&sk->sk_wmem_alloc) < READ_ONCE(sk->sk_sndbuf)) {
19602198 struct sk_buff *skb = alloc_skb(size, priority);
2199
+
19612200 if (skb) {
19622201 skb_set_owner_w(skb, sk);
19632202 return skb;
....@@ -1981,7 +2220,7 @@
19812220
19822221 /* small safe race: SKB_TRUESIZE may differ from final skb->truesize */
19832222 if (atomic_read(&sk->sk_omem_alloc) + SKB_TRUESIZE(size) >
1984
- sysctl_optmem_max)
2223
+ READ_ONCE(sysctl_optmem_max))
19852224 return NULL;
19862225
19872226 skb = alloc_skb(size, priority);
....@@ -1999,8 +2238,10 @@
19992238 */
20002239 void *sock_kmalloc(struct sock *sk, int size, gfp_t priority)
20012240 {
2002
- if ((unsigned int)size <= sysctl_optmem_max &&
2003
- atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) {
2241
+ int optmem_max = READ_ONCE(sysctl_optmem_max);
2242
+
2243
+ if ((unsigned int)size <= optmem_max &&
2244
+ atomic_read(&sk->sk_omem_alloc) + size < optmem_max) {
20042245 void *mem;
20052246 /* First do the add, to avoid the race if kmalloc
20062247 * might sleep.
....@@ -2025,7 +2266,7 @@
20252266 if (WARN_ON_ONCE(!mem))
20262267 return;
20272268 if (nullify)
2028
- kzfree(mem);
2269
+ kfree_sensitive(mem);
20292270 else
20302271 kfree(mem);
20312272 atomic_sub(size, &sk->sk_omem_alloc);
....@@ -2058,7 +2299,7 @@
20582299 break;
20592300 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
20602301 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
2061
- if (refcount_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf)
2302
+ if (refcount_read(&sk->sk_wmem_alloc) < READ_ONCE(sk->sk_sndbuf))
20622303 break;
20632304 if (sk->sk_shutdown & SEND_SHUTDOWN)
20642305 break;
....@@ -2093,7 +2334,7 @@
20932334 if (sk->sk_shutdown & SEND_SHUTDOWN)
20942335 goto failure;
20952336
2096
- if (sk_wmem_alloc_get(sk) < sk->sk_sndbuf)
2337
+ if (sk_wmem_alloc_get(sk) < READ_ONCE(sk->sk_sndbuf))
20972338 break;
20982339
20992340 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
....@@ -2139,7 +2380,7 @@
21392380 return -EINVAL;
21402381 sockc->mark = *(u32 *)CMSG_DATA(cmsg);
21412382 break;
2142
- case SO_TIMESTAMPING:
2383
+ case SO_TIMESTAMPING_OLD:
21432384 if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32)))
21442385 return -EINVAL;
21452386
....@@ -2207,8 +2448,8 @@
22072448 }
22082449 }
22092450
2210
-/* On 32bit arches, an skb frag is limited to 2^15 */
22112451 #define SKB_FRAG_PAGE_ORDER get_order(32768)
2452
+DEFINE_STATIC_KEY_FALSE(net_high_order_alloc_disable_key);
22122453
22132454 /**
22142455 * skb_page_frag_refill - check that a page_frag contains enough room
....@@ -2233,7 +2474,8 @@
22332474 }
22342475
22352476 pfrag->offset = 0;
2236
- if (SKB_FRAG_PAGE_ORDER) {
2477
+ if (SKB_FRAG_PAGE_ORDER &&
2478
+ !static_branch_unlikely(&net_high_order_alloc_disable_key)) {
22372479 /* Avoid direct reclaim but allow kswapd to wake */
22382480 pfrag->page = alloc_pages((gfp & ~__GFP_DIRECT_RECLAIM) |
22392481 __GFP_COMP | __GFP_NOWARN |
....@@ -2263,67 +2505,6 @@
22632505 return false;
22642506 }
22652507 EXPORT_SYMBOL(sk_page_frag_refill);
2266
-
2267
-int sk_alloc_sg(struct sock *sk, int len, struct scatterlist *sg,
2268
- int sg_start, int *sg_curr_index, unsigned int *sg_curr_size,
2269
- int first_coalesce)
2270
-{
2271
- int sg_curr = *sg_curr_index, use = 0, rc = 0;
2272
- unsigned int size = *sg_curr_size;
2273
- struct page_frag *pfrag;
2274
- struct scatterlist *sge;
2275
-
2276
- len -= size;
2277
- pfrag = sk_page_frag(sk);
2278
-
2279
- while (len > 0) {
2280
- unsigned int orig_offset;
2281
-
2282
- if (!sk_page_frag_refill(sk, pfrag)) {
2283
- rc = -ENOMEM;
2284
- goto out;
2285
- }
2286
-
2287
- use = min_t(int, len, pfrag->size - pfrag->offset);
2288
-
2289
- if (!sk_wmem_schedule(sk, use)) {
2290
- rc = -ENOMEM;
2291
- goto out;
2292
- }
2293
-
2294
- sk_mem_charge(sk, use);
2295
- size += use;
2296
- orig_offset = pfrag->offset;
2297
- pfrag->offset += use;
2298
-
2299
- sge = sg + sg_curr - 1;
2300
- if (sg_curr > first_coalesce && sg_page(sge) == pfrag->page &&
2301
- sge->offset + sge->length == orig_offset) {
2302
- sge->length += use;
2303
- } else {
2304
- sge = sg + sg_curr;
2305
- sg_unmark_end(sge);
2306
- sg_set_page(sge, pfrag->page, use, orig_offset);
2307
- get_page(pfrag->page);
2308
- sg_curr++;
2309
-
2310
- if (sg_curr == MAX_SKB_FRAGS)
2311
- sg_curr = 0;
2312
-
2313
- if (sg_curr == sg_start) {
2314
- rc = -ENOSPC;
2315
- break;
2316
- }
2317
- }
2318
-
2319
- len -= use;
2320
- }
2321
-out:
2322
- *sg_curr_size = size;
2323
- *sg_curr_index = sg_curr;
2324
- return rc;
2325
-}
2326
-EXPORT_SYMBOL(sk_alloc_sg);
23272508
23282509 static void __lock_sock(struct sock *sk)
23292510 __releases(&sk->sk_lock.slock)
....@@ -2358,7 +2539,7 @@
23582539 next = skb->next;
23592540 prefetch(next);
23602541 WARN_ON_ONCE(skb_dst_is_noref(skb));
2361
- skb->next = NULL;
2542
+ skb_mark_not_on_list(skb);
23622543 sk_backlog_rcv(sk, skb);
23632544
23642545 cond_resched();
....@@ -2614,20 +2795,6 @@
26142795 }
26152796 EXPORT_SYMBOL(sock_no_shutdown);
26162797
2617
-int sock_no_setsockopt(struct socket *sock, int level, int optname,
2618
- char __user *optval, unsigned int optlen)
2619
-{
2620
- return -EOPNOTSUPP;
2621
-}
2622
-EXPORT_SYMBOL(sock_no_setsockopt);
2623
-
2624
-int sock_no_getsockopt(struct socket *sock, int level, int optname,
2625
- char __user *optval, int __user *optlen)
2626
-{
2627
- return -EOPNOTSUPP;
2628
-}
2629
-EXPORT_SYMBOL(sock_no_getsockopt);
2630
-
26312798 int sock_no_sendmsg(struct socket *sock, struct msghdr *m, size_t len)
26322799 {
26332800 return -EOPNOTSUPP;
....@@ -2732,15 +2899,25 @@
27322899 rcu_read_unlock();
27332900 }
27342901
2735
-static void sock_def_readable(struct sock *sk)
2902
+void sock_def_readable(struct sock *sk)
27362903 {
27372904 struct socket_wq *wq;
27382905
27392906 rcu_read_lock();
27402907 wq = rcu_dereference(sk->sk_wq);
2741
- if (skwq_has_sleeper(wq))
2908
+
2909
+ if (skwq_has_sleeper(wq)) {
2910
+ int done = 0;
2911
+
2912
+ trace_android_vh_do_wake_up_sync(&wq->wait, &done);
2913
+ if (done)
2914
+ goto out;
2915
+
27422916 wake_up_interruptible_sync_poll(&wq->wait, EPOLLIN | EPOLLPRI |
27432917 EPOLLRDNORM | EPOLLRDBAND);
2918
+ }
2919
+
2920
+out:
27442921 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
27452922 rcu_read_unlock();
27462923 }
....@@ -2754,7 +2931,7 @@
27542931 /* Do not wake up a writer until he can make "significant"
27552932 * progress. --DaveM
27562933 */
2757
- if ((refcount_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
2934
+ if ((refcount_read(&sk->sk_wmem_alloc) << 1) <= READ_ONCE(sk->sk_sndbuf)) {
27582935 wq = rcu_dereference(sk->sk_wq);
27592936 if (skwq_has_sleeper(wq))
27602937 wake_up_interruptible_sync_poll(&wq->wait, EPOLLOUT |
....@@ -2795,6 +2972,13 @@
27952972 }
27962973 EXPORT_SYMBOL(sk_stop_timer);
27972974
2975
+void sk_stop_timer_sync(struct sock *sk, struct timer_list *timer)
2976
+{
2977
+ if (del_timer_sync(timer))
2978
+ __sock_put(sk);
2979
+}
2980
+EXPORT_SYMBOL(sk_stop_timer_sync);
2981
+
27982982 void sock_init_data(struct socket *sock, struct sock *sk)
27992983 {
28002984 sk_init_common(sk);
....@@ -2803,8 +2987,8 @@
28032987 timer_setup(&sk->sk_timer, NULL, 0);
28042988
28052989 sk->sk_allocation = GFP_KERNEL;
2806
- sk->sk_rcvbuf = sysctl_rmem_default;
2807
- sk->sk_sndbuf = sysctl_wmem_default;
2990
+ sk->sk_rcvbuf = READ_ONCE(sysctl_rmem_default);
2991
+ sk->sk_sndbuf = READ_ONCE(sysctl_wmem_default);
28082992 sk->sk_state = TCP_CLOSE;
28092993 sk_set_socket(sk, sock);
28102994
....@@ -2812,11 +2996,11 @@
28122996
28132997 if (sock) {
28142998 sk->sk_type = sock->type;
2815
- sk->sk_wq = sock->wq;
2999
+ RCU_INIT_POINTER(sk->sk_wq, &sock->wq);
28163000 sock->sk = sk;
28173001 sk->sk_uid = SOCK_INODE(sock)->i_uid;
28183002 } else {
2819
- sk->sk_wq = NULL;
3003
+ RCU_INIT_POINTER(sk->sk_wq, NULL);
28203004 sk->sk_uid = make_kuid(sock_net(sk)->user_ns, 0);
28213005 }
28223006
....@@ -2859,18 +3043,18 @@
28593043
28603044 #ifdef CONFIG_NET_RX_BUSY_POLL
28613045 sk->sk_napi_id = 0;
2862
- sk->sk_ll_usec = sysctl_net_busy_read;
3046
+ sk->sk_ll_usec = READ_ONCE(sysctl_net_busy_read);
28633047 #endif
28643048
2865
- sk->sk_max_pacing_rate = ~0U;
2866
- sk->sk_pacing_rate = ~0U;
2867
- sk->sk_pacing_shift = 10;
3049
+ sk->sk_max_pacing_rate = ~0UL;
3050
+ sk->sk_pacing_rate = ~0UL;
3051
+ WRITE_ONCE(sk->sk_pacing_shift, 10);
28683052 sk->sk_incoming_cpu = -1;
28693053
28703054 sk_rx_queue_clear(sk);
28713055 /*
28723056 * Before updating sk_refcnt, we must commit prior changes to memory
2873
- * (Documentation/RCU/rculist_nulls.txt for details)
3057
+ * (Documentation/RCU/rculist_nulls.rst for details)
28743058 */
28753059 smp_wmb();
28763060 refcount_set(&sk->sk_refcnt, 1);
....@@ -2885,12 +3069,11 @@
28853069 if (sk->sk_lock.owned)
28863070 __lock_sock(sk);
28873071 sk->sk_lock.owned = 1;
2888
- spin_unlock(&sk->sk_lock.slock);
3072
+ spin_unlock_bh(&sk->sk_lock.slock);
28893073 /*
28903074 * The sk_lock has mutex_lock() semantics here:
28913075 */
28923076 mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
2893
- local_bh_enable();
28943077 }
28953078 EXPORT_SYMBOL(lock_sock_nested);
28963079
....@@ -2939,51 +3122,55 @@
29393122
29403123 __lock_sock(sk);
29413124 sk->sk_lock.owned = 1;
2942
- spin_unlock(&sk->sk_lock.slock);
3125
+ spin_unlock_bh(&sk->sk_lock.slock);
29433126 /*
29443127 * The sk_lock has mutex_lock() semantics here:
29453128 */
29463129 mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_);
2947
- local_bh_enable();
29483130 return true;
29493131 }
29503132 EXPORT_SYMBOL(lock_sock_fast);
29513133
2952
-int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
3134
+int sock_gettstamp(struct socket *sock, void __user *userstamp,
3135
+ bool timeval, bool time32)
29533136 {
2954
- struct timeval tv;
3137
+ struct sock *sk = sock->sk;
3138
+ struct timespec64 ts;
29553139
29563140 sock_enable_timestamp(sk, SOCK_TIMESTAMP);
2957
- tv = ktime_to_timeval(sock_read_timestamp(sk));
2958
- if (tv.tv_sec == -1)
2959
- return -ENOENT;
2960
- if (tv.tv_sec == 0) {
2961
- ktime_t kt = ktime_get_real();
2962
- sock_write_timestamp(sk, kt);
2963
- tv = ktime_to_timeval(kt);
2964
- }
2965
- return copy_to_user(userstamp, &tv, sizeof(tv)) ? -EFAULT : 0;
2966
-}
2967
-EXPORT_SYMBOL(sock_get_timestamp);
2968
-
2969
-int sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp)
2970
-{
2971
- struct timespec ts;
2972
-
2973
- sock_enable_timestamp(sk, SOCK_TIMESTAMP);
2974
- ts = ktime_to_timespec(sock_read_timestamp(sk));
3141
+ ts = ktime_to_timespec64(sock_read_timestamp(sk));
29753142 if (ts.tv_sec == -1)
29763143 return -ENOENT;
29773144 if (ts.tv_sec == 0) {
29783145 ktime_t kt = ktime_get_real();
29793146 sock_write_timestamp(sk, kt);
2980
- ts = ktime_to_timespec(sk->sk_stamp);
3147
+ ts = ktime_to_timespec64(kt);
29813148 }
2982
- return copy_to_user(userstamp, &ts, sizeof(ts)) ? -EFAULT : 0;
2983
-}
2984
-EXPORT_SYMBOL(sock_get_timestampns);
29853149
2986
-void sock_enable_timestamp(struct sock *sk, int flag)
3150
+ if (timeval)
3151
+ ts.tv_nsec /= 1000;
3152
+
3153
+#ifdef CONFIG_COMPAT_32BIT_TIME
3154
+ if (time32)
3155
+ return put_old_timespec32(&ts, userstamp);
3156
+#endif
3157
+#ifdef CONFIG_SPARC64
3158
+ /* beware of padding in sparc64 timeval */
3159
+ if (timeval && !in_compat_syscall()) {
3160
+ struct __kernel_old_timeval __user tv = {
3161
+ .tv_sec = ts.tv_sec,
3162
+ .tv_usec = ts.tv_nsec,
3163
+ };
3164
+ if (copy_to_user(userstamp, &tv, sizeof(tv)))
3165
+ return -EFAULT;
3166
+ return 0;
3167
+ }
3168
+#endif
3169
+ return put_timespec64(&ts, userstamp);
3170
+}
3171
+EXPORT_SYMBOL(sock_gettstamp);
3172
+
3173
+void sock_enable_timestamp(struct sock *sk, enum sock_flags flag)
29873174 {
29883175 if (!sock_flag(sk, flag)) {
29893176 unsigned long previous_flags = sk->sk_flags;
....@@ -3052,20 +3239,6 @@
30523239 }
30533240 EXPORT_SYMBOL(sock_common_getsockopt);
30543241
3055
-#ifdef CONFIG_COMPAT
3056
-int compat_sock_common_getsockopt(struct socket *sock, int level, int optname,
3057
- char __user *optval, int __user *optlen)
3058
-{
3059
- struct sock *sk = sock->sk;
3060
-
3061
- if (sk->sk_prot->compat_getsockopt != NULL)
3062
- return sk->sk_prot->compat_getsockopt(sk, level, optname,
3063
- optval, optlen);
3064
- return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
3065
-}
3066
-EXPORT_SYMBOL(compat_sock_common_getsockopt);
3067
-#endif
3068
-
30693242 int sock_common_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
30703243 int flags)
30713244 {
....@@ -3085,7 +3258,7 @@
30853258 * Set socket options on an inet socket.
30863259 */
30873260 int sock_common_setsockopt(struct socket *sock, int level, int optname,
3088
- char __user *optval, unsigned int optlen)
3261
+ sockptr_t optval, unsigned int optlen)
30893262 {
30903263 struct sock *sk = sock->sk;
30913264
....@@ -3093,27 +3266,13 @@
30933266 }
30943267 EXPORT_SYMBOL(sock_common_setsockopt);
30953268
3096
-#ifdef CONFIG_COMPAT
3097
-int compat_sock_common_setsockopt(struct socket *sock, int level, int optname,
3098
- char __user *optval, unsigned int optlen)
3099
-{
3100
- struct sock *sk = sock->sk;
3101
-
3102
- if (sk->sk_prot->compat_setsockopt != NULL)
3103
- return sk->sk_prot->compat_setsockopt(sk, level, optname,
3104
- optval, optlen);
3105
- return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
3106
-}
3107
-EXPORT_SYMBOL(compat_sock_common_setsockopt);
3108
-#endif
3109
-
31103269 void sk_common_release(struct sock *sk)
31113270 {
31123271 if (sk->sk_prot->destroy)
31133272 sk->sk_prot->destroy(sk);
31143273
31153274 /*
3116
- * Observation: when sock_common_release is called, processes have
3275
+ * Observation: when sk_common_release is called, processes have
31173276 * no access to socket. But net still has.
31183277 * Step one, detach it from networking:
31193278 *
....@@ -3149,13 +3308,13 @@
31493308 memset(mem, 0, sizeof(*mem) * SK_MEMINFO_VARS);
31503309
31513310 mem[SK_MEMINFO_RMEM_ALLOC] = sk_rmem_alloc_get(sk);
3152
- mem[SK_MEMINFO_RCVBUF] = sk->sk_rcvbuf;
3311
+ mem[SK_MEMINFO_RCVBUF] = READ_ONCE(sk->sk_rcvbuf);
31533312 mem[SK_MEMINFO_WMEM_ALLOC] = sk_wmem_alloc_get(sk);
3154
- mem[SK_MEMINFO_SNDBUF] = sk->sk_sndbuf;
3313
+ mem[SK_MEMINFO_SNDBUF] = READ_ONCE(sk->sk_sndbuf);
31553314 mem[SK_MEMINFO_FWD_ALLOC] = sk->sk_forward_alloc;
3156
- mem[SK_MEMINFO_WMEM_QUEUED] = sk->sk_wmem_queued;
3315
+ mem[SK_MEMINFO_WMEM_QUEUED] = READ_ONCE(sk->sk_wmem_queued);
31573316 mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc);
3158
- mem[SK_MEMINFO_BACKLOG] = sk->sk_backlog.len;
3317
+ mem[SK_MEMINFO_BACKLOG] = READ_ONCE(sk->sk_backlog.len);
31593318 mem[SK_MEMINFO_DROPS] = atomic_read(&sk->sk_drops);
31603319 }
31613320
....@@ -3240,16 +3399,17 @@
32403399
32413400 core_initcall(net_inuse_init);
32423401
3243
-static void assign_proto_idx(struct proto *prot)
3402
+static int assign_proto_idx(struct proto *prot)
32443403 {
32453404 prot->inuse_idx = find_first_zero_bit(proto_inuse_idx, PROTO_INUSE_NR);
32463405
32473406 if (unlikely(prot->inuse_idx == PROTO_INUSE_NR - 1)) {
32483407 pr_err("PROTO_INUSE_NR exhausted\n");
3249
- return;
3408
+ return -ENOSPC;
32503409 }
32513410
32523411 set_bit(prot->inuse_idx, proto_inuse_idx);
3412
+ return 0;
32533413 }
32543414
32553415 static void release_proto_idx(struct proto *prot)
....@@ -3258,8 +3418,9 @@
32583418 clear_bit(prot->inuse_idx, proto_inuse_idx);
32593419 }
32603420 #else
3261
-static inline void assign_proto_idx(struct proto *prot)
3421
+static inline int assign_proto_idx(struct proto *prot)
32623422 {
3423
+ return 0;
32633424 }
32643425
32653426 static inline void release_proto_idx(struct proto *prot)
....@@ -3270,6 +3431,16 @@
32703431 {
32713432 }
32723433 #endif
3434
+
3435
+static void tw_prot_cleanup(struct timewait_sock_ops *twsk_prot)
3436
+{
3437
+ if (!twsk_prot)
3438
+ return;
3439
+ kfree(twsk_prot->twsk_slab_name);
3440
+ twsk_prot->twsk_slab_name = NULL;
3441
+ kmem_cache_destroy(twsk_prot->twsk_slab);
3442
+ twsk_prot->twsk_slab = NULL;
3443
+}
32733444
32743445 static void req_prot_cleanup(struct request_sock_ops *rsk_prot)
32753446 {
....@@ -3308,6 +3479,8 @@
33083479
33093480 int proto_register(struct proto *prot, int alloc_slab)
33103481 {
3482
+ int ret = -ENOBUFS;
3483
+
33113484 if (alloc_slab) {
33123485 prot->slab = kmem_cache_create_usercopy(prot->name,
33133486 prot->obj_size, 0,
....@@ -3339,25 +3512,32 @@
33393512 prot->slab_flags,
33403513 NULL);
33413514 if (prot->twsk_prot->twsk_slab == NULL)
3342
- goto out_free_timewait_sock_slab_name;
3515
+ goto out_free_timewait_sock_slab;
33433516 }
33443517 }
33453518
33463519 mutex_lock(&proto_list_mutex);
3520
+ ret = assign_proto_idx(prot);
3521
+ if (ret) {
3522
+ mutex_unlock(&proto_list_mutex);
3523
+ goto out_free_timewait_sock_slab;
3524
+ }
33473525 list_add(&prot->node, &proto_list);
3348
- assign_proto_idx(prot);
33493526 mutex_unlock(&proto_list_mutex);
3350
- return 0;
3527
+ return ret;
33513528
3352
-out_free_timewait_sock_slab_name:
3353
- kfree(prot->twsk_prot->twsk_slab_name);
3529
+out_free_timewait_sock_slab:
3530
+ if (alloc_slab && prot->twsk_prot)
3531
+ tw_prot_cleanup(prot->twsk_prot);
33543532 out_free_request_sock_slab:
3355
- req_prot_cleanup(prot->rsk_prot);
3533
+ if (alloc_slab) {
3534
+ req_prot_cleanup(prot->rsk_prot);
33563535
3357
- kmem_cache_destroy(prot->slab);
3358
- prot->slab = NULL;
3536
+ kmem_cache_destroy(prot->slab);
3537
+ prot->slab = NULL;
3538
+ }
33593539 out:
3360
- return -ENOBUFS;
3540
+ return ret;
33613541 }
33623542 EXPORT_SYMBOL(proto_register);
33633543
....@@ -3372,12 +3552,7 @@
33723552 prot->slab = NULL;
33733553
33743554 req_prot_cleanup(prot->rsk_prot);
3375
-
3376
- if (prot->twsk_prot != NULL && prot->twsk_prot->twsk_slab != NULL) {
3377
- kmem_cache_destroy(prot->twsk_prot->twsk_slab);
3378
- kfree(prot->twsk_prot->twsk_slab_name);
3379
- prot->twsk_prot->twsk_slab = NULL;
3380
- }
3555
+ tw_prot_cleanup(prot->twsk_prot);
33813556 }
33823557 EXPORT_SYMBOL(proto_unregister);
33833558
....@@ -3394,6 +3569,7 @@
33943569 #ifdef CONFIG_INET
33953570 if (family == AF_INET &&
33963571 protocol != IPPROTO_RAW &&
3572
+ protocol < MAX_INET_PROTOS &&
33973573 !rcu_access_pointer(inet_protos[protocol]))
33983574 return -ENOENT;
33993575 #endif
....@@ -3431,7 +3607,7 @@
34313607 return proto->memory_allocated != NULL ? proto_memory_allocated(proto) : -1L;
34323608 }
34333609
3434
-static char *sock_prot_memory_pressure(struct proto *proto)
3610
+static const char *sock_prot_memory_pressure(struct proto *proto)
34353611 {
34363612 return proto->memory_pressure != NULL ?
34373613 proto_memory_pressure(proto) ? "yes" : "no" : "NI";
....@@ -3535,3 +3711,11 @@
35353711 }
35363712 EXPORT_SYMBOL(sk_busy_loop_end);
35373713 #endif /* CONFIG_NET_RX_BUSY_POLL */
3714
+
3715
+int sock_bind_add(struct sock *sk, struct sockaddr *addr, int addr_len)
3716
+{
3717
+ if (!sk->sk_prot->bind_add)
3718
+ return -EOPNOTSUPP;
3719
+ return sk->sk_prot->bind_add(sk, addr, addr_len);
3720
+}
3721
+EXPORT_SYMBOL(sock_bind_add);